1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/codegen/arm64/assembler-arm64-inl.h"
6 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
7 #include "src/codegen/optimized-compilation-info.h"
8 #include "src/compiler/backend/code-generator-impl.h"
9 #include "src/compiler/backend/code-generator.h"
10 #include "src/compiler/backend/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/execution/frame-constants.h"
14 #include "src/heap/memory-chunk.h"
15 #include "src/wasm/wasm-code-manager.h"
16 #include "src/wasm/wasm-objects.h"
17 
18 namespace v8 {
19 namespace internal {
20 namespace compiler {
21 
22 #define __ tasm()->
23 
24 // Adds Arm64-specific methods to convert InstructionOperands.
25 class Arm64OperandConverter final : public InstructionOperandConverter {
26  public:
Arm64OperandConverter(CodeGenerator * gen,Instruction * instr)27   Arm64OperandConverter(CodeGenerator* gen, Instruction* instr)
28       : InstructionOperandConverter(gen, instr) {}
29 
InputFloat32Register(size_t index)30   DoubleRegister InputFloat32Register(size_t index) {
31     return InputDoubleRegister(index).S();
32   }
33 
InputFloat64Register(size_t index)34   DoubleRegister InputFloat64Register(size_t index) {
35     return InputDoubleRegister(index);
36   }
37 
InputSimd128Register(size_t index)38   DoubleRegister InputSimd128Register(size_t index) {
39     return InputDoubleRegister(index).Q();
40   }
41 
InputFloat32OrZeroRegister(size_t index)42   CPURegister InputFloat32OrZeroRegister(size_t index) {
43     if (instr_->InputAt(index)->IsImmediate()) {
44       DCHECK_EQ(0, bit_cast<int32_t>(InputFloat32(index)));
45       return wzr;
46     }
47     DCHECK(instr_->InputAt(index)->IsFPRegister());
48     return InputDoubleRegister(index).S();
49   }
50 
InputFloat64OrZeroRegister(size_t index)51   CPURegister InputFloat64OrZeroRegister(size_t index) {
52     if (instr_->InputAt(index)->IsImmediate()) {
53       DCHECK_EQ(0, bit_cast<int64_t>(InputDouble(index)));
54       return xzr;
55     }
56     DCHECK(instr_->InputAt(index)->IsDoubleRegister());
57     return InputDoubleRegister(index);
58   }
59 
OutputCount()60   size_t OutputCount() { return instr_->OutputCount(); }
61 
OutputFloat32Register()62   DoubleRegister OutputFloat32Register() { return OutputDoubleRegister().S(); }
63 
OutputFloat64Register()64   DoubleRegister OutputFloat64Register() { return OutputDoubleRegister(); }
65 
OutputSimd128Register()66   DoubleRegister OutputSimd128Register() { return OutputDoubleRegister().Q(); }
67 
InputRegister32(size_t index)68   Register InputRegister32(size_t index) {
69     return ToRegister(instr_->InputAt(index)).W();
70   }
71 
InputOrZeroRegister32(size_t index)72   Register InputOrZeroRegister32(size_t index) {
73     DCHECK(instr_->InputAt(index)->IsRegister() ||
74            (instr_->InputAt(index)->IsImmediate() && (InputInt32(index) == 0)));
75     if (instr_->InputAt(index)->IsImmediate()) {
76       return wzr;
77     }
78     return InputRegister32(index);
79   }
80 
InputRegister64(size_t index)81   Register InputRegister64(size_t index) { return InputRegister(index); }
82 
InputOrZeroRegister64(size_t index)83   Register InputOrZeroRegister64(size_t index) {
84     DCHECK(instr_->InputAt(index)->IsRegister() ||
85            (instr_->InputAt(index)->IsImmediate() && (InputInt64(index) == 0)));
86     if (instr_->InputAt(index)->IsImmediate()) {
87       return xzr;
88     }
89     return InputRegister64(index);
90   }
91 
InputOperand(size_t index)92   Operand InputOperand(size_t index) {
93     return ToOperand(instr_->InputAt(index));
94   }
95 
InputOperand64(size_t index)96   Operand InputOperand64(size_t index) { return InputOperand(index); }
97 
InputOperand32(size_t index)98   Operand InputOperand32(size_t index) {
99     return ToOperand32(instr_->InputAt(index));
100   }
101 
OutputRegister64()102   Register OutputRegister64() { return OutputRegister(); }
103 
OutputRegister32()104   Register OutputRegister32() { return ToRegister(instr_->Output()).W(); }
105 
TempRegister32(size_t index)106   Register TempRegister32(size_t index) {
107     return ToRegister(instr_->TempAt(index)).W();
108   }
109 
InputOperand2_32(size_t index)110   Operand InputOperand2_32(size_t index) {
111     switch (AddressingModeField::decode(instr_->opcode())) {
112       case kMode_None:
113         return InputOperand32(index);
114       case kMode_Operand2_R_LSL_I:
115         return Operand(InputRegister32(index), LSL, InputInt5(index + 1));
116       case kMode_Operand2_R_LSR_I:
117         return Operand(InputRegister32(index), LSR, InputInt5(index + 1));
118       case kMode_Operand2_R_ASR_I:
119         return Operand(InputRegister32(index), ASR, InputInt5(index + 1));
120       case kMode_Operand2_R_ROR_I:
121         return Operand(InputRegister32(index), ROR, InputInt5(index + 1));
122       case kMode_Operand2_R_UXTB:
123         return Operand(InputRegister32(index), UXTB);
124       case kMode_Operand2_R_UXTH:
125         return Operand(InputRegister32(index), UXTH);
126       case kMode_Operand2_R_SXTB:
127         return Operand(InputRegister32(index), SXTB);
128       case kMode_Operand2_R_SXTH:
129         return Operand(InputRegister32(index), SXTH);
130       case kMode_Operand2_R_SXTW:
131         return Operand(InputRegister32(index), SXTW);
132       case kMode_MRI:
133       case kMode_MRR:
134       case kMode_Root:
135         break;
136     }
137     UNREACHABLE();
138   }
139 
InputOperand2_64(size_t index)140   Operand InputOperand2_64(size_t index) {
141     switch (AddressingModeField::decode(instr_->opcode())) {
142       case kMode_None:
143         return InputOperand64(index);
144       case kMode_Operand2_R_LSL_I:
145         return Operand(InputRegister64(index), LSL, InputInt6(index + 1));
146       case kMode_Operand2_R_LSR_I:
147         return Operand(InputRegister64(index), LSR, InputInt6(index + 1));
148       case kMode_Operand2_R_ASR_I:
149         return Operand(InputRegister64(index), ASR, InputInt6(index + 1));
150       case kMode_Operand2_R_ROR_I:
151         return Operand(InputRegister64(index), ROR, InputInt6(index + 1));
152       case kMode_Operand2_R_UXTB:
153         return Operand(InputRegister64(index), UXTB);
154       case kMode_Operand2_R_UXTH:
155         return Operand(InputRegister64(index), UXTH);
156       case kMode_Operand2_R_SXTB:
157         return Operand(InputRegister64(index), SXTB);
158       case kMode_Operand2_R_SXTH:
159         return Operand(InputRegister64(index), SXTH);
160       case kMode_Operand2_R_SXTW:
161         return Operand(InputRegister64(index), SXTW);
162       case kMode_MRI:
163       case kMode_MRR:
164       case kMode_Root:
165         break;
166     }
167     UNREACHABLE();
168   }
169 
MemoryOperand(size_t index=0)170   MemOperand MemoryOperand(size_t index = 0) {
171     switch (AddressingModeField::decode(instr_->opcode())) {
172       case kMode_None:
173       case kMode_Operand2_R_LSR_I:
174       case kMode_Operand2_R_ASR_I:
175       case kMode_Operand2_R_ROR_I:
176       case kMode_Operand2_R_UXTB:
177       case kMode_Operand2_R_UXTH:
178       case kMode_Operand2_R_SXTB:
179       case kMode_Operand2_R_SXTH:
180       case kMode_Operand2_R_SXTW:
181         break;
182       case kMode_Root:
183         return MemOperand(kRootRegister, InputInt64(index));
184       case kMode_Operand2_R_LSL_I:
185         return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
186                           LSL, InputInt32(index + 2));
187       case kMode_MRI:
188         return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
189       case kMode_MRR:
190         return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
191     }
192     UNREACHABLE();
193   }
194 
ToOperand(InstructionOperand * op)195   Operand ToOperand(InstructionOperand* op) {
196     if (op->IsRegister()) {
197       return Operand(ToRegister(op));
198     }
199     return ToImmediate(op);
200   }
201 
ToOperand32(InstructionOperand * op)202   Operand ToOperand32(InstructionOperand* op) {
203     if (op->IsRegister()) {
204       return Operand(ToRegister(op).W());
205     }
206     return ToImmediate(op);
207   }
208 
ToImmediate(InstructionOperand * operand)209   Operand ToImmediate(InstructionOperand* operand) {
210     Constant constant = ToConstant(operand);
211     switch (constant.type()) {
212       case Constant::kInt32:
213         return Operand(constant.ToInt32());
214       case Constant::kInt64:
215         if (RelocInfo::IsWasmReference(constant.rmode())) {
216           return Operand(constant.ToInt64(), constant.rmode());
217         } else {
218           return Operand(constant.ToInt64());
219         }
220       case Constant::kFloat32:
221         return Operand(Operand::EmbeddedNumber(constant.ToFloat32()));
222       case Constant::kFloat64:
223         return Operand(Operand::EmbeddedNumber(constant.ToFloat64().value()));
224       case Constant::kExternalReference:
225         return Operand(constant.ToExternalReference());
226       case Constant::kCompressedHeapObject:  // Fall through.
227       case Constant::kHeapObject:
228         return Operand(constant.ToHeapObject());
229       case Constant::kDelayedStringConstant:
230         return Operand::EmbeddedStringConstant(
231             constant.ToDelayedStringConstant());
232       case Constant::kRpoNumber:
233         UNREACHABLE();  // TODO(dcarney): RPO immediates on arm64.
234         break;
235     }
236     UNREACHABLE();
237   }
238 
ToMemOperand(InstructionOperand * op,TurboAssembler * tasm) const239   MemOperand ToMemOperand(InstructionOperand* op, TurboAssembler* tasm) const {
240     DCHECK_NOT_NULL(op);
241     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
242     return SlotToMemOperand(AllocatedOperand::cast(op)->index(), tasm);
243   }
244 
SlotToMemOperand(int slot,TurboAssembler * tasm) const245   MemOperand SlotToMemOperand(int slot, TurboAssembler* tasm) const {
246     FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
247     if (offset.from_frame_pointer()) {
248       int from_sp = offset.offset() + frame_access_state()->GetSPToFPOffset();
249       // Convert FP-offsets to SP-offsets if it results in better code.
250       if (Assembler::IsImmLSUnscaled(from_sp) ||
251           Assembler::IsImmLSScaled(from_sp, 3)) {
252         offset = FrameOffset::FromStackPointer(from_sp);
253       }
254     }
255     return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
256   }
257 };
258 
259 namespace {
260 
261 class OutOfLineRecordWrite final : public OutOfLineCode {
262  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand offset,Register value,RecordWriteMode mode,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)263   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
264                        Register value, RecordWriteMode mode,
265                        StubCallMode stub_mode,
266                        UnwindingInfoWriter* unwinding_info_writer)
267       : OutOfLineCode(gen),
268         object_(object),
269         offset_(offset),
270         value_(value),
271         mode_(mode),
272         stub_mode_(stub_mode),
273         must_save_lr_(!gen->frame_access_state()->has_frame()),
274         unwinding_info_writer_(unwinding_info_writer),
275         zone_(gen->zone()) {}
276 
Generate()277   void Generate() final {
278     if (mode_ > RecordWriteMode::kValueIsPointer) {
279       __ JumpIfSmi(value_, exit());
280     }
281     if (COMPRESS_POINTERS_BOOL) {
282       __ DecompressTaggedPointer(value_, value_);
283     }
284     __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, ne,
285                      exit());
286     RememberedSetAction const remembered_set_action =
287         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
288                                              : OMIT_REMEMBERED_SET;
289     SaveFPRegsMode const save_fp_mode =
290         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
291     if (must_save_lr_) {
292       // We need to save and restore lr if the frame was elided.
293       __ Push<TurboAssembler::kSignLR>(lr, padreg);
294       unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset(), sp);
295     }
296     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
297       __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
298     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
299       // A direct call to a wasm runtime stub defined in this module.
300       // Just encode the stub index. This will be patched when the code
301       // is added to the native module and copied into wasm code space.
302       __ CallRecordWriteStub(object_, offset_, remembered_set_action,
303                              save_fp_mode, wasm::WasmCode::kRecordWrite);
304     } else {
305       __ CallRecordWriteStub(object_, offset_, remembered_set_action,
306                              save_fp_mode);
307     }
308     if (must_save_lr_) {
309       __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
310       unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
311     }
312   }
313 
314  private:
315   Register const object_;
316   Operand const offset_;
317   Register const value_;
318   RecordWriteMode const mode_;
319   StubCallMode const stub_mode_;
320   bool must_save_lr_;
321   UnwindingInfoWriter* const unwinding_info_writer_;
322   Zone* zone_;
323 };
324 
FlagsConditionToCondition(FlagsCondition condition)325 Condition FlagsConditionToCondition(FlagsCondition condition) {
326   switch (condition) {
327     case kEqual:
328       return eq;
329     case kNotEqual:
330       return ne;
331     case kSignedLessThan:
332       return lt;
333     case kSignedGreaterThanOrEqual:
334       return ge;
335     case kSignedLessThanOrEqual:
336       return le;
337     case kSignedGreaterThan:
338       return gt;
339     case kUnsignedLessThan:
340       return lo;
341     case kUnsignedGreaterThanOrEqual:
342       return hs;
343     case kUnsignedLessThanOrEqual:
344       return ls;
345     case kUnsignedGreaterThan:
346       return hi;
347     case kFloatLessThanOrUnordered:
348       return lt;
349     case kFloatGreaterThanOrEqual:
350       return ge;
351     case kFloatLessThanOrEqual:
352       return ls;
353     case kFloatGreaterThanOrUnordered:
354       return hi;
355     case kFloatLessThan:
356       return lo;
357     case kFloatGreaterThanOrEqualOrUnordered:
358       return hs;
359     case kFloatLessThanOrEqualOrUnordered:
360       return le;
361     case kFloatGreaterThan:
362       return gt;
363     case kOverflow:
364       return vs;
365     case kNotOverflow:
366       return vc;
367     case kUnorderedEqual:
368     case kUnorderedNotEqual:
369       break;
370     case kPositiveOrZero:
371       return pl;
372     case kNegative:
373       return mi;
374   }
375   UNREACHABLE();
376 }
377 
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,Arm64OperandConverter const & i)378 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
379                                    InstructionCode opcode, Instruction* instr,
380                                    Arm64OperandConverter const& i) {
381   const MemoryAccessMode access_mode =
382       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
383   if (access_mode == kMemoryAccessPoisoned) {
384     Register value = i.OutputRegister();
385     Register poison = value.Is64Bits() ? kSpeculationPoisonRegister
386                                        : kSpeculationPoisonRegister.W();
387     codegen->tasm()->And(value, value, Operand(poison));
388   }
389 }
390 
EmitMaybePoisonedFPLoad(CodeGenerator * codegen,InstructionCode opcode,Arm64OperandConverter * i,VRegister output_reg)391 void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
392                              Arm64OperandConverter* i, VRegister output_reg) {
393   const MemoryAccessMode access_mode =
394       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
395   AddressingMode address_mode = AddressingModeField::decode(opcode);
396   if (access_mode == kMemoryAccessPoisoned && address_mode != kMode_Root) {
397     UseScratchRegisterScope temps(codegen->tasm());
398     Register address = temps.AcquireX();
399     switch (address_mode) {
400       case kMode_MRI:  // Fall through.
401       case kMode_MRR:
402         codegen->tasm()->Add(address, i->InputRegister(0), i->InputOperand(1));
403         break;
404       case kMode_Operand2_R_LSL_I:
405         codegen->tasm()->Add(address, i->InputRegister(0),
406                              i->InputOperand2_64(1));
407         break;
408       default:
409         // Note: we don't need poisoning for kMode_Root loads as those loads
410         // target a fixed offset from root register which is set once when
411         // initializing the vm.
412         UNREACHABLE();
413     }
414     codegen->tasm()->And(address, address, Operand(kSpeculationPoisonRegister));
415     codegen->tasm()->Ldr(output_reg, MemOperand(address));
416   } else {
417     codegen->tasm()->Ldr(output_reg, i->MemoryOperand());
418   }
419 }
420 
421 // Handles unary ops that work for float (scalar), double (scalar), or NEON.
422 template <typename Fn>
EmitFpOrNeonUnop(TurboAssembler * tasm,Fn fn,Instruction * instr,Arm64OperandConverter i,VectorFormat scalar,VectorFormat vector)423 void EmitFpOrNeonUnop(TurboAssembler* tasm, Fn fn, Instruction* instr,
424                       Arm64OperandConverter i, VectorFormat scalar,
425                       VectorFormat vector) {
426   VectorFormat f = instr->InputAt(0)->IsSimd128Register() ? vector : scalar;
427 
428   VRegister output = VRegister::Create(i.OutputDoubleRegister().code(), f);
429   VRegister input = VRegister::Create(i.InputDoubleRegister(0).code(), f);
430   (tasm->*fn)(output, input);
431 }
432 
433 }  // namespace
434 
435 #define ASSEMBLE_SHIFT(asm_instr, width)                                    \
436   do {                                                                      \
437     if (instr->InputAt(1)->IsRegister()) {                                  \
438       __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0),    \
439                    i.InputRegister##width(1));                              \
440     } else {                                                                \
441       uint32_t imm =                                                        \
442           static_cast<uint32_t>(i.InputOperand##width(1).ImmediateValue()); \
443       __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0),    \
444                    imm % (width));                                          \
445     }                                                                       \
446   } while (0)
447 
448 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr, reg)                   \
449   do {                                                                 \
450     __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
451     __ asm_instr(i.Output##reg(), i.TempRegister(0));                  \
452   } while (0)
453 
454 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, reg)                  \
455   do {                                                                 \
456     __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
457     __ asm_instr(i.Input##reg(2), i.TempRegister(0));                  \
458   } while (0)
459 
460 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr, reg)       \
461   do {                                                                       \
462     Label exchange;                                                          \
463     __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
464     __ Bind(&exchange);                                                      \
465     __ load_instr(i.Output##reg(), i.TempRegister(0));                       \
466     __ store_instr(i.TempRegister32(1), i.Input##reg(2), i.TempRegister(0)); \
467     __ Cbnz(i.TempRegister32(1), &exchange);                                 \
468   } while (0)
469 
470 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, ext, \
471                                                  reg)                          \
472   do {                                                                         \
473     Label compareExchange;                                                     \
474     Label exit;                                                                \
475     __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));         \
476     __ Bind(&compareExchange);                                                 \
477     __ load_instr(i.Output##reg(), i.TempRegister(0));                         \
478     __ Cmp(i.Output##reg(), Operand(i.Input##reg(2), ext));                    \
479     __ B(ne, &exit);                                                           \
480     __ store_instr(i.TempRegister32(1), i.Input##reg(3), i.TempRegister(0));   \
481     __ Cbnz(i.TempRegister32(1), &compareExchange);                            \
482     __ Bind(&exit);                                                            \
483   } while (0)
484 
485 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr, reg)       \
486   do {                                                                       \
487     Label binop;                                                             \
488     __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
489     __ Bind(&binop);                                                         \
490     __ load_instr(i.Output##reg(), i.TempRegister(0));                       \
491     __ bin_instr(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
492     __ store_instr(i.TempRegister32(2), i.Temp##reg(1), i.TempRegister(0));  \
493     __ Cbnz(i.TempRegister32(2), &binop);                                    \
494   } while (0)
495 
496 #define ASSEMBLE_IEEE754_BINOP(name)                                        \
497   do {                                                                      \
498     FrameScope scope(tasm(), StackFrame::MANUAL);                           \
499     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
500   } while (0)
501 
502 #define ASSEMBLE_IEEE754_UNOP(name)                                         \
503   do {                                                                      \
504     FrameScope scope(tasm(), StackFrame::MANUAL);                           \
505     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
506   } while (0)
507 
508 // If shift value is an immediate, we can call asm_imm, taking the shift value
509 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
510 // call asm_shl.
511 #define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, format, asm_shl, gp)       \
512   do {                                                                      \
513     if (instr->InputAt(1)->IsImmediate()) {                                 \
514       __ asm_imm(i.OutputSimd128Register().format(),                        \
515                  i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
516     } else {                                                                \
517       UseScratchRegisterScope temps(tasm());                                \
518       VRegister tmp = temps.AcquireQ();                                     \
519       Register shift = temps.Acquire##gp();                                 \
520       constexpr int mask = (1 << width) - 1;                                \
521       __ And(shift, i.InputRegister32(1), mask);                            \
522       __ Dup(tmp.format(), shift);                                          \
523       __ asm_shl(i.OutputSimd128Register().format(),                        \
524                  i.InputSimd128Register(0).format(), tmp.format());         \
525     }                                                                       \
526   } while (0)
527 
528 // If shift value is an immediate, we can call asm_imm, taking the shift value
529 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
530 // call asm_shl, passing in the negative shift value (treated as right shift).
531 #define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, format, asm_shl, gp)      \
532   do {                                                                      \
533     if (instr->InputAt(1)->IsImmediate()) {                                 \
534       __ asm_imm(i.OutputSimd128Register().format(),                        \
535                  i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
536     } else {                                                                \
537       UseScratchRegisterScope temps(tasm());                                \
538       VRegister tmp = temps.AcquireQ();                                     \
539       Register shift = temps.Acquire##gp();                                 \
540       constexpr int mask = (1 << width) - 1;                                \
541       __ And(shift, i.InputRegister32(1), mask);                            \
542       __ Dup(tmp.format(), shift);                                          \
543       __ Neg(tmp.format(), tmp.format());                                   \
544       __ asm_shl(i.OutputSimd128Register().format(),                        \
545                  i.InputSimd128Register(0).format(), tmp.format());         \
546     }                                                                       \
547   } while (0)
548 
AssembleDeconstructFrame()549 void CodeGenerator::AssembleDeconstructFrame() {
550   __ Mov(sp, fp);
551   __ Pop<TurboAssembler::kAuthLR>(fp, lr);
552 
553   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
554 }
555 
AssemblePrepareTailCall()556 void CodeGenerator::AssemblePrepareTailCall() {
557   if (frame_access_state()->has_frame()) {
558     __ RestoreFPAndLR();
559   }
560   frame_access_state()->SetFrameAccessToSP();
561 }
562 
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)563 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
564                                                      Register scratch1,
565                                                      Register scratch2,
566                                                      Register scratch3) {
567   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
568   Label done;
569 
570   // Check if current frame is an arguments adaptor frame.
571   __ Ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
572   __ Cmp(scratch1,
573          Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
574   __ B(ne, &done);
575 
576   // Load arguments count from current arguments adaptor frame (note, it
577   // does not include receiver).
578   Register caller_args_count_reg = scratch1;
579   __ Ldr(caller_args_count_reg,
580          MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
581   __ SmiUntag(caller_args_count_reg);
582 
583   __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
584   __ bind(&done);
585 }
586 
587 namespace {
588 
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)589 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
590                                    FrameAccessState* state,
591                                    int new_slot_above_sp,
592                                    bool allow_shrinkage = true) {
593   int current_sp_offset = state->GetSPToFPSlotCount() +
594                           StandardFrameConstants::kFixedSlotCountAboveFp;
595   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
596   DCHECK_EQ(stack_slot_delta % 2, 0);
597   if (stack_slot_delta > 0) {
598     tasm->Claim(stack_slot_delta);
599     state->IncreaseSPDelta(stack_slot_delta);
600   } else if (allow_shrinkage && stack_slot_delta < 0) {
601     tasm->Drop(-stack_slot_delta);
602     state->IncreaseSPDelta(stack_slot_delta);
603   }
604 }
605 
606 }  // namespace
607 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)608 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
609                                               int first_unused_stack_slot) {
610   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
611                                 first_unused_stack_slot, false);
612 }
613 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)614 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
615                                              int first_unused_stack_slot) {
616   DCHECK_EQ(first_unused_stack_slot % 2, 0);
617   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
618                                 first_unused_stack_slot);
619   DCHECK(instr->IsTailCall());
620   InstructionOperandConverter g(this, instr);
621   int optional_padding_slot = g.InputInt32(instr->InputCount() - 2);
622   if (optional_padding_slot % 2) {
623     __ Poke(padreg, optional_padding_slot * kSystemPointerSize);
624   }
625 }
626 
627 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()628 void CodeGenerator::AssembleCodeStartRegisterCheck() {
629   UseScratchRegisterScope temps(tasm());
630   Register scratch = temps.AcquireX();
631   __ ComputeCodeStartAddress(scratch);
632   __ cmp(scratch, kJavaScriptCallCodeStartRegister);
633   __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
634 }
635 
636 // Check if the code object is marked for deoptimization. If it is, then it
637 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
638 // to:
639 //    1. read from memory the word that contains that bit, which can be found in
640 //       the flags in the referenced {CodeDataContainer} object;
641 //    2. test kMarkedForDeoptimizationBit in those flags; and
642 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()643 void CodeGenerator::BailoutIfDeoptimized() {
644   UseScratchRegisterScope temps(tasm());
645   Register scratch = temps.AcquireX();
646   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
647   __ LoadTaggedPointerField(
648       scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
649   __ Ldr(scratch.W(),
650          FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
651   Label not_deoptimized;
652   __ Tbz(scratch.W(), Code::kMarkedForDeoptimizationBit, &not_deoptimized);
653   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
654           RelocInfo::CODE_TARGET);
655   __ Bind(&not_deoptimized);
656 }
657 
GenerateSpeculationPoisonFromCodeStartRegister()658 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
659   UseScratchRegisterScope temps(tasm());
660   Register scratch = temps.AcquireX();
661 
662   // Set a mask which has all bits set in the normal case, but has all
663   // bits cleared if we are speculatively executing the wrong PC.
664   __ ComputeCodeStartAddress(scratch);
665   __ Cmp(kJavaScriptCallCodeStartRegister, scratch);
666   __ Csetm(kSpeculationPoisonRegister, eq);
667   __ Csdb();
668 }
669 
AssembleRegisterArgumentPoisoning()670 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
671   UseScratchRegisterScope temps(tasm());
672   Register scratch = temps.AcquireX();
673 
674   __ Mov(scratch, sp);
675   __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
676   __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
677   __ And(scratch, scratch, kSpeculationPoisonRegister);
678   __ Mov(sp, scratch);
679 }
680 
681 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)682 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
683     Instruction* instr) {
684   Arm64OperandConverter i(this, instr);
685   InstructionCode opcode = instr->opcode();
686   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
687   switch (arch_opcode) {
688     case kArchCallCodeObject: {
689       if (instr->InputAt(0)->IsImmediate()) {
690         __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
691       } else {
692         Register reg = i.InputRegister(0);
693         DCHECK_IMPLIES(
694             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
695             reg == kJavaScriptCallCodeStartRegister);
696         __ CallCodeObject(reg);
697       }
698       RecordCallPosition(instr);
699       frame_access_state()->ClearSPDelta();
700       break;
701     }
702     case kArchCallBuiltinPointer: {
703       DCHECK(!instr->InputAt(0)->IsImmediate());
704       Register builtin_index = i.InputRegister(0);
705       __ CallBuiltinByIndex(builtin_index);
706       RecordCallPosition(instr);
707       frame_access_state()->ClearSPDelta();
708       break;
709     }
710     case kArchCallWasmFunction: {
711       if (instr->InputAt(0)->IsImmediate()) {
712         Constant constant = i.ToConstant(instr->InputAt(0));
713         Address wasm_code = static_cast<Address>(constant.ToInt64());
714         __ Call(wasm_code, constant.rmode());
715       } else {
716         Register target = i.InputRegister(0);
717         __ Call(target);
718       }
719       RecordCallPosition(instr);
720       frame_access_state()->ClearSPDelta();
721       break;
722     }
723     case kArchTailCallCodeObjectFromJSFunction:
724     case kArchTailCallCodeObject: {
725       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
726         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
727                                          i.TempRegister(0), i.TempRegister(1),
728                                          i.TempRegister(2));
729       }
730       if (instr->InputAt(0)->IsImmediate()) {
731         __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
732       } else {
733         Register reg = i.InputRegister(0);
734         DCHECK_IMPLIES(
735             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
736             reg == kJavaScriptCallCodeStartRegister);
737         __ JumpCodeObject(reg);
738       }
739       unwinding_info_writer_.MarkBlockWillExit();
740       frame_access_state()->ClearSPDelta();
741       frame_access_state()->SetFrameAccessToDefault();
742       break;
743     }
744     case kArchTailCallWasm: {
745       if (instr->InputAt(0)->IsImmediate()) {
746         Constant constant = i.ToConstant(instr->InputAt(0));
747         Address wasm_code = static_cast<Address>(constant.ToInt64());
748         __ Jump(wasm_code, constant.rmode());
749       } else {
750         Register target = i.InputRegister(0);
751         UseScratchRegisterScope temps(tasm());
752         temps.Exclude(x17);
753         __ Mov(x17, target);
754         __ Jump(x17);
755       }
756       unwinding_info_writer_.MarkBlockWillExit();
757       frame_access_state()->ClearSPDelta();
758       frame_access_state()->SetFrameAccessToDefault();
759       break;
760     }
761     case kArchTailCallAddress: {
762       CHECK(!instr->InputAt(0)->IsImmediate());
763       Register reg = i.InputRegister(0);
764       DCHECK_IMPLIES(
765           instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
766           reg == kJavaScriptCallCodeStartRegister);
767       UseScratchRegisterScope temps(tasm());
768       temps.Exclude(x17);
769       __ Mov(x17, reg);
770       __ Jump(x17);
771       unwinding_info_writer_.MarkBlockWillExit();
772       frame_access_state()->ClearSPDelta();
773       frame_access_state()->SetFrameAccessToDefault();
774       break;
775     }
776     case kArchCallJSFunction: {
777       Register func = i.InputRegister(0);
778       if (FLAG_debug_code) {
779         // Check the function's context matches the context argument.
780         UseScratchRegisterScope scope(tasm());
781         Register temp = scope.AcquireX();
782         __ LoadTaggedPointerField(
783             temp, FieldMemOperand(func, JSFunction::kContextOffset));
784         __ cmp(cp, temp);
785         __ Assert(eq, AbortReason::kWrongFunctionContext);
786       }
787       static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
788       __ LoadTaggedPointerField(x2,
789                                 FieldMemOperand(func, JSFunction::kCodeOffset));
790       __ CallCodeObject(x2);
791       RecordCallPosition(instr);
792       frame_access_state()->ClearSPDelta();
793       break;
794     }
795     case kArchPrepareCallCFunction:
796       // We don't need kArchPrepareCallCFunction on arm64 as the instruction
797       // selector has already performed a Claim to reserve space on the stack.
798       // Frame alignment is always 16 bytes, and the stack pointer is already
799       // 16-byte aligned, therefore we do not need to align the stack pointer
800       // by an unknown value, and it is safe to continue accessing the frame
801       // via the stack pointer.
802       UNREACHABLE();
803     case kArchSaveCallerRegisters: {
804       fp_mode_ =
805           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
806       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
807       // kReturnRegister0 should have been saved before entering the stub.
808       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
809       DCHECK(IsAligned(bytes, kSystemPointerSize));
810       DCHECK_EQ(0, frame_access_state()->sp_delta());
811       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
812       DCHECK(!caller_registers_saved_);
813       caller_registers_saved_ = true;
814       break;
815     }
816     case kArchRestoreCallerRegisters: {
817       DCHECK(fp_mode_ ==
818              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
819       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
820       // Don't overwrite the returned value.
821       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
822       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
823       DCHECK_EQ(0, frame_access_state()->sp_delta());
824       DCHECK(caller_registers_saved_);
825       caller_registers_saved_ = false;
826       break;
827     }
828     case kArchPrepareTailCall:
829       AssemblePrepareTailCall();
830       break;
831     case kArchCallCFunction: {
832       int const num_parameters = MiscField::decode(instr->opcode());
833       Label return_location;
834       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
835         // Put the return address in a stack slot.
836         __ StoreReturnAddressInWasmExitFrame(&return_location);
837       }
838 
839       if (instr->InputAt(0)->IsImmediate()) {
840         ExternalReference ref = i.InputExternalReference(0);
841         __ CallCFunction(ref, num_parameters, 0);
842       } else {
843         Register func = i.InputRegister(0);
844         __ CallCFunction(func, num_parameters, 0);
845       }
846       __ Bind(&return_location);
847       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
848         RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
849       }
850       frame_access_state()->SetFrameAccessToDefault();
851       // Ideally, we should decrement SP delta to match the change of stack
852       // pointer in CallCFunction. However, for certain architectures (e.g.
853       // ARM), there may be more strict alignment requirement, causing old SP
854       // to be saved on the stack. In those cases, we can not calculate the SP
855       // delta statically.
856       frame_access_state()->ClearSPDelta();
857       if (caller_registers_saved_) {
858         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
859         // Here, we assume the sequence to be:
860         //   kArchSaveCallerRegisters;
861         //   kArchCallCFunction;
862         //   kArchRestoreCallerRegisters;
863         int bytes =
864             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
865         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
866       }
867       break;
868     }
869     case kArchJmp:
870       AssembleArchJump(i.InputRpo(0));
871       break;
872     case kArchTableSwitch:
873       AssembleArchTableSwitch(instr);
874       break;
875     case kArchBinarySearchSwitch:
876       AssembleArchBinarySearchSwitch(instr);
877       break;
878     case kArchAbortCSAAssert:
879       DCHECK_EQ(i.InputRegister(0), x1);
880       {
881         // We don't actually want to generate a pile of code for this, so just
882         // claim there is a stack frame, without generating one.
883         FrameScope scope(tasm(), StackFrame::NONE);
884         __ Call(
885             isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
886             RelocInfo::CODE_TARGET);
887       }
888       __ Debug("kArchAbortCSAAssert", 0, BREAK);
889       unwinding_info_writer_.MarkBlockWillExit();
890       break;
891     case kArchDebugBreak:
892       __ DebugBreak();
893       break;
894     case kArchComment:
895       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
896       break;
897     case kArchThrowTerminator:
898       unwinding_info_writer_.MarkBlockWillExit();
899       break;
900     case kArchNop:
901       // don't emit code for nops.
902       break;
903     case kArchDeoptimize: {
904       DeoptimizationExit* exit =
905           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
906       __ B(exit->label());
907       break;
908     }
909     case kArchRet:
910       AssembleReturn(instr->InputAt(0));
911       break;
912     case kArchFramePointer:
913       __ mov(i.OutputRegister(), fp);
914       break;
915     case kArchParentFramePointer:
916       if (frame_access_state()->has_frame()) {
917         __ ldr(i.OutputRegister(), MemOperand(fp, 0));
918       } else {
919         __ mov(i.OutputRegister(), fp);
920       }
921       break;
922     case kArchStackPointerGreaterThan: {
923       // Potentially apply an offset to the current stack pointer before the
924       // comparison to consider the size difference of an optimized frame versus
925       // the contained unoptimized frames.
926 
927       Register lhs_register = sp;
928       uint32_t offset;
929 
930       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
931         lhs_register = i.TempRegister(0);
932         __ Sub(lhs_register, sp, offset);
933       }
934 
935       constexpr size_t kValueIndex = 0;
936       DCHECK(instr->InputAt(kValueIndex)->IsRegister());
937       __ Cmp(lhs_register, i.InputRegister(kValueIndex));
938       break;
939     }
940     case kArchStackCheckOffset:
941       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
942       break;
943     case kArchTruncateDoubleToI:
944       __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
945                            i.InputDoubleRegister(0), DetermineStubCallMode(),
946                            frame_access_state()->has_frame()
947                                ? kLRHasBeenSaved
948                                : kLRHasNotBeenSaved);
949 
950       break;
951     case kArchStoreWithWriteBarrier: {
952       RecordWriteMode mode =
953           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
954       AddressingMode addressing_mode =
955           AddressingModeField::decode(instr->opcode());
956       Register object = i.InputRegister(0);
957       Operand offset(0);
958       if (addressing_mode == kMode_MRI) {
959         offset = Operand(i.InputInt64(1));
960       } else {
961         DCHECK_EQ(addressing_mode, kMode_MRR);
962         offset = Operand(i.InputRegister(1));
963       }
964       Register value = i.InputRegister(2);
965       auto ool = zone()->New<OutOfLineRecordWrite>(
966           this, object, offset, value, mode, DetermineStubCallMode(),
967           &unwinding_info_writer_);
968       __ StoreTaggedField(value, MemOperand(object, offset));
969       __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
970                        eq, ool->entry());
971       __ Bind(ool->exit());
972       break;
973     }
974     case kArchStackSlot: {
975       FrameOffset offset =
976           frame_access_state()->GetFrameOffset(i.InputInt32(0));
977       Register base = offset.from_stack_pointer() ? sp : fp;
978       __ Add(i.OutputRegister(0), base, Operand(offset.offset()));
979       break;
980     }
981     case kIeee754Float64Acos:
982       ASSEMBLE_IEEE754_UNOP(acos);
983       break;
984     case kIeee754Float64Acosh:
985       ASSEMBLE_IEEE754_UNOP(acosh);
986       break;
987     case kIeee754Float64Asin:
988       ASSEMBLE_IEEE754_UNOP(asin);
989       break;
990     case kIeee754Float64Asinh:
991       ASSEMBLE_IEEE754_UNOP(asinh);
992       break;
993     case kIeee754Float64Atan:
994       ASSEMBLE_IEEE754_UNOP(atan);
995       break;
996     case kIeee754Float64Atanh:
997       ASSEMBLE_IEEE754_UNOP(atanh);
998       break;
999     case kIeee754Float64Atan2:
1000       ASSEMBLE_IEEE754_BINOP(atan2);
1001       break;
1002     case kIeee754Float64Cos:
1003       ASSEMBLE_IEEE754_UNOP(cos);
1004       break;
1005     case kIeee754Float64Cosh:
1006       ASSEMBLE_IEEE754_UNOP(cosh);
1007       break;
1008     case kIeee754Float64Cbrt:
1009       ASSEMBLE_IEEE754_UNOP(cbrt);
1010       break;
1011     case kIeee754Float64Exp:
1012       ASSEMBLE_IEEE754_UNOP(exp);
1013       break;
1014     case kIeee754Float64Expm1:
1015       ASSEMBLE_IEEE754_UNOP(expm1);
1016       break;
1017     case kIeee754Float64Log:
1018       ASSEMBLE_IEEE754_UNOP(log);
1019       break;
1020     case kIeee754Float64Log1p:
1021       ASSEMBLE_IEEE754_UNOP(log1p);
1022       break;
1023     case kIeee754Float64Log2:
1024       ASSEMBLE_IEEE754_UNOP(log2);
1025       break;
1026     case kIeee754Float64Log10:
1027       ASSEMBLE_IEEE754_UNOP(log10);
1028       break;
1029     case kIeee754Float64Pow:
1030       ASSEMBLE_IEEE754_BINOP(pow);
1031       break;
1032     case kIeee754Float64Sin:
1033       ASSEMBLE_IEEE754_UNOP(sin);
1034       break;
1035     case kIeee754Float64Sinh:
1036       ASSEMBLE_IEEE754_UNOP(sinh);
1037       break;
1038     case kIeee754Float64Tan:
1039       ASSEMBLE_IEEE754_UNOP(tan);
1040       break;
1041     case kIeee754Float64Tanh:
1042       ASSEMBLE_IEEE754_UNOP(tanh);
1043       break;
1044     case kArm64Float32RoundDown:
1045       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatS,
1046                        kFormat4S);
1047       break;
1048     case kArm64Float64RoundDown:
1049       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatD,
1050                        kFormat2D);
1051       break;
1052     case kArm64Float32RoundUp:
1053       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatS,
1054                        kFormat4S);
1055       break;
1056     case kArm64Float64RoundUp:
1057       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatD,
1058                        kFormat2D);
1059       break;
1060     case kArm64Float64RoundTiesAway:
1061       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frinta, instr, i, kFormatD,
1062                        kFormat2D);
1063       break;
1064     case kArm64Float32RoundTruncate:
1065       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatS,
1066                        kFormat4S);
1067       break;
1068     case kArm64Float64RoundTruncate:
1069       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatD,
1070                        kFormat2D);
1071       break;
1072     case kArm64Float32RoundTiesEven:
1073       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatS,
1074                        kFormat4S);
1075       break;
1076     case kArm64Float64RoundTiesEven:
1077       EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatD,
1078                        kFormat2D);
1079       break;
1080     case kArm64Add:
1081       if (FlagsModeField::decode(opcode) != kFlags_none) {
1082         __ Adds(i.OutputRegister(), i.InputOrZeroRegister64(0),
1083                 i.InputOperand2_64(1));
1084       } else {
1085         __ Add(i.OutputRegister(), i.InputOrZeroRegister64(0),
1086                i.InputOperand2_64(1));
1087       }
1088       break;
1089     case kArm64Add32:
1090       if (FlagsModeField::decode(opcode) != kFlags_none) {
1091         __ Adds(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1092                 i.InputOperand2_32(1));
1093       } else {
1094         __ Add(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1095                i.InputOperand2_32(1));
1096       }
1097       break;
1098     case kArm64And:
1099       if (FlagsModeField::decode(opcode) != kFlags_none) {
1100         // The ands instruction only sets N and Z, so only the following
1101         // conditions make sense.
1102         DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
1103                FlagsConditionField::decode(opcode) == kNotEqual ||
1104                FlagsConditionField::decode(opcode) == kPositiveOrZero ||
1105                FlagsConditionField::decode(opcode) == kNegative);
1106         __ Ands(i.OutputRegister(), i.InputOrZeroRegister64(0),
1107                 i.InputOperand2_64(1));
1108       } else {
1109         __ And(i.OutputRegister(), i.InputOrZeroRegister64(0),
1110                i.InputOperand2_64(1));
1111       }
1112       break;
1113     case kArm64And32:
1114       if (FlagsModeField::decode(opcode) != kFlags_none) {
1115         // The ands instruction only sets N and Z, so only the following
1116         // conditions make sense.
1117         DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
1118                FlagsConditionField::decode(opcode) == kNotEqual ||
1119                FlagsConditionField::decode(opcode) == kPositiveOrZero ||
1120                FlagsConditionField::decode(opcode) == kNegative);
1121         __ Ands(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1122                 i.InputOperand2_32(1));
1123       } else {
1124         __ And(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1125                i.InputOperand2_32(1));
1126       }
1127       break;
1128     case kArm64Bic:
1129       __ Bic(i.OutputRegister(), i.InputOrZeroRegister64(0),
1130              i.InputOperand2_64(1));
1131       break;
1132     case kArm64Bic32:
1133       __ Bic(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1134              i.InputOperand2_32(1));
1135       break;
1136     case kArm64Mul:
1137       __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1138       break;
1139     case kArm64Mul32:
1140       __ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1141       break;
1142     case kArm64Saddlp: {
1143       VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1144       VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1145       __ Saddlp(i.OutputSimd128Register().Format(dst_f),
1146                 i.InputSimd128Register(0).Format(src_f));
1147       break;
1148     }
1149     case kArm64Uaddlp: {
1150       VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1151       VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1152       __ Uaddlp(i.OutputSimd128Register().Format(dst_f),
1153                 i.InputSimd128Register(0).Format(src_f));
1154       break;
1155     }
1156     case kArm64Smull: {
1157       if (instr->InputAt(0)->IsRegister()) {
1158         __ Smull(i.OutputRegister(), i.InputRegister32(0),
1159                  i.InputRegister32(1));
1160       } else {
1161         DCHECK(instr->InputAt(0)->IsSimd128Register());
1162         VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1163         VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1164         __ Smull(i.OutputSimd128Register().Format(dst_f),
1165                  i.InputSimd128Register(0).Format(src_f),
1166                  i.InputSimd128Register(1).Format(src_f));
1167       }
1168       break;
1169     }
1170     case kArm64Smull2: {
1171       VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1172       VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1173       __ Smull2(i.OutputSimd128Register().Format(dst_f),
1174                 i.InputSimd128Register(0).Format(src_f),
1175                 i.InputSimd128Register(1).Format(src_f));
1176       break;
1177     }
1178     case kArm64Umull: {
1179       if (instr->InputAt(0)->IsRegister()) {
1180         __ Umull(i.OutputRegister(), i.InputRegister32(0),
1181                  i.InputRegister32(1));
1182       } else {
1183         DCHECK(instr->InputAt(0)->IsSimd128Register());
1184         VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1185         VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1186         __ Umull(i.OutputSimd128Register().Format(dst_f),
1187                  i.InputSimd128Register(0).Format(src_f),
1188                  i.InputSimd128Register(1).Format(src_f));
1189       }
1190       break;
1191     }
1192     case kArm64Umull2: {
1193       VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1194       VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1195       __ Umull2(i.OutputSimd128Register().Format(dst_f),
1196                 i.InputSimd128Register(0).Format(src_f),
1197                 i.InputSimd128Register(1).Format(src_f));
1198       break;
1199     }
1200     case kArm64Madd:
1201       __ Madd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1202               i.InputRegister(2));
1203       break;
1204     case kArm64Madd32:
1205       __ Madd(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1206               i.InputRegister32(2));
1207       break;
1208     case kArm64Msub:
1209       __ Msub(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1210               i.InputRegister(2));
1211       break;
1212     case kArm64Msub32:
1213       __ Msub(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1214               i.InputRegister32(2));
1215       break;
1216     case kArm64Mneg:
1217       __ Mneg(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1218       break;
1219     case kArm64Mneg32:
1220       __ Mneg(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1221       break;
1222     case kArm64Idiv:
1223       __ Sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1224       break;
1225     case kArm64Idiv32:
1226       __ Sdiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1227       break;
1228     case kArm64Udiv:
1229       __ Udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1230       break;
1231     case kArm64Udiv32:
1232       __ Udiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1233       break;
1234     case kArm64Imod: {
1235       UseScratchRegisterScope scope(tasm());
1236       Register temp = scope.AcquireX();
1237       __ Sdiv(temp, i.InputRegister(0), i.InputRegister(1));
1238       __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1239       break;
1240     }
1241     case kArm64Imod32: {
1242       UseScratchRegisterScope scope(tasm());
1243       Register temp = scope.AcquireW();
1244       __ Sdiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1245       __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1246               i.InputRegister32(0));
1247       break;
1248     }
1249     case kArm64Umod: {
1250       UseScratchRegisterScope scope(tasm());
1251       Register temp = scope.AcquireX();
1252       __ Udiv(temp, i.InputRegister(0), i.InputRegister(1));
1253       __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1254       break;
1255     }
1256     case kArm64Umod32: {
1257       UseScratchRegisterScope scope(tasm());
1258       Register temp = scope.AcquireW();
1259       __ Udiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1260       __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1261               i.InputRegister32(0));
1262       break;
1263     }
1264     case kArm64Not:
1265       __ Mvn(i.OutputRegister(), i.InputOperand(0));
1266       break;
1267     case kArm64Not32:
1268       __ Mvn(i.OutputRegister32(), i.InputOperand32(0));
1269       break;
1270     case kArm64Or:
1271       __ Orr(i.OutputRegister(), i.InputOrZeroRegister64(0),
1272              i.InputOperand2_64(1));
1273       break;
1274     case kArm64Or32:
1275       __ Orr(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1276              i.InputOperand2_32(1));
1277       break;
1278     case kArm64Orn:
1279       __ Orn(i.OutputRegister(), i.InputOrZeroRegister64(0),
1280              i.InputOperand2_64(1));
1281       break;
1282     case kArm64Orn32:
1283       __ Orn(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1284              i.InputOperand2_32(1));
1285       break;
1286     case kArm64Eor:
1287       __ Eor(i.OutputRegister(), i.InputOrZeroRegister64(0),
1288              i.InputOperand2_64(1));
1289       break;
1290     case kArm64Eor32:
1291       __ Eor(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1292              i.InputOperand2_32(1));
1293       break;
1294     case kArm64Eon:
1295       __ Eon(i.OutputRegister(), i.InputOrZeroRegister64(0),
1296              i.InputOperand2_64(1));
1297       break;
1298     case kArm64Eon32:
1299       __ Eon(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1300              i.InputOperand2_32(1));
1301       break;
1302     case kArm64Sub:
1303       if (FlagsModeField::decode(opcode) != kFlags_none) {
1304         __ Subs(i.OutputRegister(), i.InputOrZeroRegister64(0),
1305                 i.InputOperand2_64(1));
1306       } else {
1307         __ Sub(i.OutputRegister(), i.InputOrZeroRegister64(0),
1308                i.InputOperand2_64(1));
1309       }
1310       break;
1311     case kArm64Sub32:
1312       if (FlagsModeField::decode(opcode) != kFlags_none) {
1313         __ Subs(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1314                 i.InputOperand2_32(1));
1315       } else {
1316         __ Sub(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1317                i.InputOperand2_32(1));
1318       }
1319       break;
1320     case kArm64Lsl:
1321       ASSEMBLE_SHIFT(Lsl, 64);
1322       break;
1323     case kArm64Lsl32:
1324       ASSEMBLE_SHIFT(Lsl, 32);
1325       break;
1326     case kArm64Lsr:
1327       ASSEMBLE_SHIFT(Lsr, 64);
1328       break;
1329     case kArm64Lsr32:
1330       ASSEMBLE_SHIFT(Lsr, 32);
1331       break;
1332     case kArm64Asr:
1333       ASSEMBLE_SHIFT(Asr, 64);
1334       break;
1335     case kArm64Asr32:
1336       ASSEMBLE_SHIFT(Asr, 32);
1337       break;
1338     case kArm64Ror:
1339       ASSEMBLE_SHIFT(Ror, 64);
1340       break;
1341     case kArm64Ror32:
1342       ASSEMBLE_SHIFT(Ror, 32);
1343       break;
1344     case kArm64Mov32:
1345       __ Mov(i.OutputRegister32(), i.InputRegister32(0));
1346       break;
1347     case kArm64Sxtb32:
1348       __ Sxtb(i.OutputRegister32(), i.InputRegister32(0));
1349       break;
1350     case kArm64Sxth32:
1351       __ Sxth(i.OutputRegister32(), i.InputRegister32(0));
1352       break;
1353     case kArm64Sxtb:
1354       __ Sxtb(i.OutputRegister(), i.InputRegister32(0));
1355       break;
1356     case kArm64Sxth:
1357       __ Sxth(i.OutputRegister(), i.InputRegister32(0));
1358       break;
1359     case kArm64Sxtw:
1360       __ Sxtw(i.OutputRegister(), i.InputRegister32(0));
1361       break;
1362     case kArm64Sbfx:
1363       __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1364               i.InputInt6(2));
1365       break;
1366     case kArm64Sbfx32:
1367       __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1368               i.InputInt5(2));
1369       break;
1370     case kArm64Ubfx:
1371       __ Ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1372               i.InputInt32(2));
1373       break;
1374     case kArm64Ubfx32:
1375       __ Ubfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1376               i.InputInt32(2));
1377       break;
1378     case kArm64Ubfiz32:
1379       __ Ubfiz(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1380                i.InputInt5(2));
1381       break;
1382     case kArm64Bfi:
1383       __ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
1384              i.InputInt6(3));
1385       break;
1386     case kArm64TestAndBranch32:
1387     case kArm64TestAndBranch:
1388       // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
1389       break;
1390     case kArm64CompareAndBranch32:
1391     case kArm64CompareAndBranch:
1392       // Pseudo instruction handled in AssembleArchBranch.
1393       break;
1394     case kArm64Claim: {
1395       int count = i.InputInt32(0);
1396       DCHECK_EQ(count % 2, 0);
1397       __ AssertSpAligned();
1398       if (count > 0) {
1399         __ Claim(count);
1400         frame_access_state()->IncreaseSPDelta(count);
1401       }
1402       break;
1403     }
1404     case kArm64Poke: {
1405       Operand operand(i.InputInt32(1) * kSystemPointerSize);
1406       if (instr->InputAt(0)->IsSimd128Register()) {
1407         __ Poke(i.InputSimd128Register(0), operand);
1408       } else if (instr->InputAt(0)->IsFPRegister()) {
1409         __ Poke(i.InputFloat64Register(0), operand);
1410       } else {
1411         __ Poke(i.InputOrZeroRegister64(0), operand);
1412       }
1413       break;
1414     }
1415     case kArm64PokePair: {
1416       int slot = i.InputInt32(2) - 1;
1417       if (instr->InputAt(0)->IsFPRegister()) {
1418         __ PokePair(i.InputFloat64Register(1), i.InputFloat64Register(0),
1419                     slot * kSystemPointerSize);
1420       } else {
1421         __ PokePair(i.InputRegister(1), i.InputRegister(0),
1422                     slot * kSystemPointerSize);
1423       }
1424       break;
1425     }
1426     case kArm64Peek: {
1427       int reverse_slot = i.InputInt32(0);
1428       int offset =
1429           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1430       if (instr->OutputAt(0)->IsFPRegister()) {
1431         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1432         if (op->representation() == MachineRepresentation::kFloat64) {
1433           __ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1434         } else if (op->representation() == MachineRepresentation::kFloat32) {
1435           __ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1436         } else {
1437           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1438           __ Ldr(i.OutputSimd128Register(), MemOperand(fp, offset));
1439         }
1440       } else {
1441         __ Ldr(i.OutputRegister(), MemOperand(fp, offset));
1442       }
1443       break;
1444     }
1445     case kArm64Clz:
1446       __ Clz(i.OutputRegister64(), i.InputRegister64(0));
1447       break;
1448     case kArm64Clz32:
1449       __ Clz(i.OutputRegister32(), i.InputRegister32(0));
1450       break;
1451     case kArm64Rbit:
1452       __ Rbit(i.OutputRegister64(), i.InputRegister64(0));
1453       break;
1454     case kArm64Rbit32:
1455       __ Rbit(i.OutputRegister32(), i.InputRegister32(0));
1456       break;
1457     case kArm64Rev:
1458       __ Rev(i.OutputRegister64(), i.InputRegister64(0));
1459       break;
1460     case kArm64Rev32:
1461       __ Rev(i.OutputRegister32(), i.InputRegister32(0));
1462       break;
1463     case kArm64Cmp:
1464       __ Cmp(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1465       break;
1466     case kArm64Cmp32:
1467       __ Cmp(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1468       break;
1469     case kArm64Cmn:
1470       __ Cmn(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1471       break;
1472     case kArm64Cmn32:
1473       __ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1474       break;
1475     case kArm64Cnt: {
1476       VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
1477       __ Cnt(i.OutputSimd128Register().Format(f),
1478              i.InputSimd128Register(0).Format(f));
1479       break;
1480     }
1481     case kArm64Tst:
1482       __ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1483       break;
1484     case kArm64Tst32:
1485       __ Tst(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1486       break;
1487     case kArm64Float32Cmp:
1488       if (instr->InputAt(1)->IsFPRegister()) {
1489         __ Fcmp(i.InputFloat32Register(0), i.InputFloat32Register(1));
1490       } else {
1491         DCHECK(instr->InputAt(1)->IsImmediate());
1492         // 0.0 is the only immediate supported by fcmp instructions.
1493         DCHECK_EQ(0.0f, i.InputFloat32(1));
1494         __ Fcmp(i.InputFloat32Register(0), i.InputFloat32(1));
1495       }
1496       break;
1497     case kArm64Float32Add:
1498       __ Fadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
1499               i.InputFloat32Register(1));
1500       break;
1501     case kArm64Float32Sub:
1502       __ Fsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
1503               i.InputFloat32Register(1));
1504       break;
1505     case kArm64Float32Mul:
1506       __ Fmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1507               i.InputFloat32Register(1));
1508       break;
1509     case kArm64Float32Div:
1510       __ Fdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
1511               i.InputFloat32Register(1));
1512       break;
1513     case kArm64Float32Abs:
1514       __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
1515       break;
1516     case kArm64Float32Neg:
1517       __ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
1518       break;
1519     case kArm64Float32Sqrt:
1520       __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
1521       break;
1522     case kArm64Float32Fnmul: {
1523       __ Fnmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1524                i.InputFloat32Register(1));
1525       break;
1526     }
1527     case kArm64Float64Cmp:
1528       if (instr->InputAt(1)->IsFPRegister()) {
1529         __ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1530       } else {
1531         DCHECK(instr->InputAt(1)->IsImmediate());
1532         // 0.0 is the only immediate supported by fcmp instructions.
1533         DCHECK_EQ(0.0, i.InputDouble(1));
1534         __ Fcmp(i.InputDoubleRegister(0), i.InputDouble(1));
1535       }
1536       break;
1537     case kArm64Float64Add:
1538       __ Fadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1539               i.InputDoubleRegister(1));
1540       break;
1541     case kArm64Float64Sub:
1542       __ Fsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1543               i.InputDoubleRegister(1));
1544       break;
1545     case kArm64Float64Mul:
1546       __ Fmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1547               i.InputDoubleRegister(1));
1548       break;
1549     case kArm64Float64Div:
1550       __ Fdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1551               i.InputDoubleRegister(1));
1552       break;
1553     case kArm64Float64Mod: {
1554       // TODO(turbofan): implement directly.
1555       FrameScope scope(tasm(), StackFrame::MANUAL);
1556       DCHECK_EQ(d0, i.InputDoubleRegister(0));
1557       DCHECK_EQ(d1, i.InputDoubleRegister(1));
1558       DCHECK_EQ(d0, i.OutputDoubleRegister());
1559       // TODO(turbofan): make sure this saves all relevant registers.
1560       __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1561       break;
1562     }
1563     case kArm64Float32Max: {
1564       __ Fmax(i.OutputFloat32Register(), i.InputFloat32Register(0),
1565               i.InputFloat32Register(1));
1566       break;
1567     }
1568     case kArm64Float64Max: {
1569       __ Fmax(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1570               i.InputDoubleRegister(1));
1571       break;
1572     }
1573     case kArm64Float32Min: {
1574       __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),
1575               i.InputFloat32Register(1));
1576       break;
1577     }
1578     case kArm64Float64Min: {
1579       __ Fmin(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1580               i.InputDoubleRegister(1));
1581       break;
1582     }
1583     case kArm64Float64Abs:
1584       __ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1585       break;
1586     case kArm64Float64Neg:
1587       __ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1588       break;
1589     case kArm64Float64Sqrt:
1590       __ Fsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1591       break;
1592     case kArm64Float64Fnmul:
1593       __ Fnmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1594                i.InputDoubleRegister(1));
1595       break;
1596     case kArm64Float32ToFloat64:
1597       __ Fcvt(i.OutputDoubleRegister(), i.InputDoubleRegister(0).S());
1598       break;
1599     case kArm64Float64ToFloat32:
1600       __ Fcvt(i.OutputDoubleRegister().S(), i.InputDoubleRegister(0));
1601       break;
1602     case kArm64Float32ToInt32: {
1603       __ Fcvtzs(i.OutputRegister32(), i.InputFloat32Register(0));
1604       bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
1605       if (set_overflow_to_min_i32) {
1606         // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1607         // because INT32_MIN allows easier out-of-bounds detection.
1608         __ Cmn(i.OutputRegister32(), 1);
1609         __ Csinc(i.OutputRegister32(), i.OutputRegister32(),
1610                  i.OutputRegister32(), vc);
1611       }
1612       break;
1613     }
1614     case kArm64Float64ToInt32:
1615       __ Fcvtzs(i.OutputRegister32(), i.InputDoubleRegister(0));
1616       break;
1617     case kArm64Float32ToUint32: {
1618       __ Fcvtzu(i.OutputRegister32(), i.InputFloat32Register(0));
1619       bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
1620       if (set_overflow_to_min_u32) {
1621         // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1622         // because 0 allows easier out-of-bounds detection.
1623         __ Cmn(i.OutputRegister32(), 1);
1624         __ Adc(i.OutputRegister32(), i.OutputRegister32(), Operand(0));
1625       }
1626       break;
1627     }
1628     case kArm64Float64ToUint32:
1629       __ Fcvtzu(i.OutputRegister32(), i.InputDoubleRegister(0));
1630       break;
1631     case kArm64Float32ToInt64:
1632       __ Fcvtzs(i.OutputRegister64(), i.InputFloat32Register(0));
1633       if (i.OutputCount() > 1) {
1634         // Check for inputs below INT64_MIN and NaN.
1635         __ Fcmp(i.InputFloat32Register(0), static_cast<float>(INT64_MIN));
1636         // Check overflow.
1637         // -1 value is used to indicate a possible overflow which will occur
1638         // when subtracting (-1) from the provided INT64_MAX operand.
1639         // OutputRegister(1) is set to 0 if the input was out of range or NaN.
1640         __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
1641         __ Cset(i.OutputRegister(1), vc);
1642       }
1643       break;
1644     case kArm64Float64ToInt64:
1645       __ Fcvtzs(i.OutputRegister(0), i.InputDoubleRegister(0));
1646       if (i.OutputCount() > 1) {
1647         // See kArm64Float32ToInt64 for a detailed description.
1648         __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT64_MIN));
1649         __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
1650         __ Cset(i.OutputRegister(1), vc);
1651       }
1652       break;
1653     case kArm64Float32ToUint64:
1654       __ Fcvtzu(i.OutputRegister64(), i.InputFloat32Register(0));
1655       if (i.OutputCount() > 1) {
1656         // See kArm64Float32ToInt64 for a detailed description.
1657         __ Fcmp(i.InputFloat32Register(0), -1.0);
1658         __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
1659         __ Cset(i.OutputRegister(1), ne);
1660       }
1661       break;
1662     case kArm64Float64ToUint64:
1663       __ Fcvtzu(i.OutputRegister64(), i.InputDoubleRegister(0));
1664       if (i.OutputCount() > 1) {
1665         // See kArm64Float32ToInt64 for a detailed description.
1666         __ Fcmp(i.InputDoubleRegister(0), -1.0);
1667         __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
1668         __ Cset(i.OutputRegister(1), ne);
1669       }
1670       break;
1671     case kArm64Int32ToFloat32:
1672       __ Scvtf(i.OutputFloat32Register(), i.InputRegister32(0));
1673       break;
1674     case kArm64Int32ToFloat64:
1675       __ Scvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
1676       break;
1677     case kArm64Int64ToFloat32:
1678       __ Scvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
1679       break;
1680     case kArm64Int64ToFloat64:
1681       __ Scvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
1682       break;
1683     case kArm64Uint32ToFloat32:
1684       __ Ucvtf(i.OutputFloat32Register(), i.InputRegister32(0));
1685       break;
1686     case kArm64Uint32ToFloat64:
1687       __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
1688       break;
1689     case kArm64Uint64ToFloat32:
1690       __ Ucvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
1691       break;
1692     case kArm64Uint64ToFloat64:
1693       __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
1694       break;
1695     case kArm64Float64ExtractLowWord32:
1696       __ Fmov(i.OutputRegister32(), i.InputFloat32Register(0));
1697       break;
1698     case kArm64Float64ExtractHighWord32:
1699       __ Umov(i.OutputRegister32(), i.InputFloat64Register(0).V2S(), 1);
1700       break;
1701     case kArm64Float64InsertLowWord32:
1702       DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
1703       __ Ins(i.OutputFloat64Register().V2S(), 0, i.InputRegister32(1));
1704       break;
1705     case kArm64Float64InsertHighWord32:
1706       DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
1707       __ Ins(i.OutputFloat64Register().V2S(), 1, i.InputRegister32(1));
1708       break;
1709     case kArm64Float64MoveU64:
1710       __ Fmov(i.OutputFloat64Register(), i.InputRegister(0));
1711       break;
1712     case kArm64Float64SilenceNaN:
1713       __ CanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1714       break;
1715     case kArm64U64MoveFloat64:
1716       __ Fmov(i.OutputRegister(), i.InputDoubleRegister(0));
1717       break;
1718     case kArm64Ldrb:
1719       __ Ldrb(i.OutputRegister(), i.MemoryOperand());
1720       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1721       break;
1722     case kArm64Ldrsb:
1723       __ Ldrsb(i.OutputRegister(), i.MemoryOperand());
1724       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1725       break;
1726     case kArm64Strb:
1727       __ Strb(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1728       break;
1729     case kArm64Ldrh:
1730       __ Ldrh(i.OutputRegister(), i.MemoryOperand());
1731       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1732       break;
1733     case kArm64Ldrsh:
1734       __ Ldrsh(i.OutputRegister(), i.MemoryOperand());
1735       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1736       break;
1737     case kArm64Strh:
1738       __ Strh(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1739       break;
1740     case kArm64Ldrsw:
1741       __ Ldrsw(i.OutputRegister(), i.MemoryOperand());
1742       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1743       break;
1744     case kArm64LdrW:
1745       __ Ldr(i.OutputRegister32(), i.MemoryOperand());
1746       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1747       break;
1748     case kArm64StrW:
1749       __ Str(i.InputOrZeroRegister32(0), i.MemoryOperand(1));
1750       break;
1751     case kArm64Ldr:
1752       __ Ldr(i.OutputRegister(), i.MemoryOperand());
1753       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1754       break;
1755     case kArm64LdrDecompressTaggedSigned:
1756       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1757       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1758       break;
1759     case kArm64LdrDecompressTaggedPointer:
1760       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1761       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1762       break;
1763     case kArm64LdrDecompressAnyTagged:
1764       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1765       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1766       break;
1767     case kArm64Str:
1768       __ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1769       break;
1770     case kArm64StrCompressTagged:
1771       __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1772       break;
1773     case kArm64LdrS:
1774       EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister().S());
1775       break;
1776     case kArm64StrS:
1777       __ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1));
1778       break;
1779     case kArm64LdrD:
1780       EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister());
1781       break;
1782     case kArm64StrD:
1783       __ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1));
1784       break;
1785     case kArm64LdrQ:
1786       __ Ldr(i.OutputSimd128Register(), i.MemoryOperand());
1787       break;
1788     case kArm64StrQ:
1789       __ Str(i.InputSimd128Register(0), i.MemoryOperand(1));
1790       break;
1791     case kArm64DmbIsh:
1792       __ Dmb(InnerShareable, BarrierAll);
1793       break;
1794     case kArm64DsbIsb:
1795       __ Dsb(FullSystem, BarrierAll);
1796       __ Isb();
1797       break;
1798     case kArchWordPoisonOnSpeculation:
1799       __ And(i.OutputRegister(0), i.InputRegister(0),
1800              Operand(kSpeculationPoisonRegister));
1801       break;
1802     case kWord32AtomicLoadInt8:
1803       ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
1804       __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1805       break;
1806     case kWord32AtomicLoadUint8:
1807     case kArm64Word64AtomicLoadUint8:
1808       ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
1809       break;
1810     case kWord32AtomicLoadInt16:
1811       ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
1812       __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1813       break;
1814     case kWord32AtomicLoadUint16:
1815     case kArm64Word64AtomicLoadUint16:
1816       ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
1817       break;
1818     case kWord32AtomicLoadWord32:
1819     case kArm64Word64AtomicLoadUint32:
1820       ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register32);
1821       break;
1822     case kArm64Word64AtomicLoadUint64:
1823       ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register);
1824       break;
1825     case kWord32AtomicStoreWord8:
1826     case kArm64Word64AtomicStoreWord8:
1827       ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrb, Register32);
1828       break;
1829     case kWord32AtomicStoreWord16:
1830     case kArm64Word64AtomicStoreWord16:
1831       ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrh, Register32);
1832       break;
1833     case kWord32AtomicStoreWord32:
1834     case kArm64Word64AtomicStoreWord32:
1835       ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register32);
1836       break;
1837     case kArm64Word64AtomicStoreWord64:
1838       ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register);
1839       break;
1840     case kWord32AtomicExchangeInt8:
1841       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
1842       __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1843       break;
1844     case kWord32AtomicExchangeUint8:
1845     case kArm64Word64AtomicExchangeUint8:
1846       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
1847       break;
1848     case kWord32AtomicExchangeInt16:
1849       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
1850       __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1851       break;
1852     case kWord32AtomicExchangeUint16:
1853     case kArm64Word64AtomicExchangeUint16:
1854       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
1855       break;
1856     case kWord32AtomicExchangeWord32:
1857     case kArm64Word64AtomicExchangeUint32:
1858       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register32);
1859       break;
1860     case kArm64Word64AtomicExchangeUint64:
1861       ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register);
1862       break;
1863     case kWord32AtomicCompareExchangeInt8:
1864       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
1865                                                Register32);
1866       __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1867       break;
1868     case kWord32AtomicCompareExchangeUint8:
1869     case kArm64Word64AtomicCompareExchangeUint8:
1870       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
1871                                                Register32);
1872       break;
1873     case kWord32AtomicCompareExchangeInt16:
1874       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
1875                                                Register32);
1876       __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1877       break;
1878     case kWord32AtomicCompareExchangeUint16:
1879     case kArm64Word64AtomicCompareExchangeUint16:
1880       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
1881                                                Register32);
1882       break;
1883     case kWord32AtomicCompareExchangeWord32:
1884     case kArm64Word64AtomicCompareExchangeUint32:
1885       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTW, Register32);
1886       break;
1887     case kArm64Word64AtomicCompareExchangeUint64:
1888       ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTX, Register);
1889       break;
1890 #define ATOMIC_BINOP_CASE(op, inst)                          \
1891   case kWord32Atomic##op##Int8:                              \
1892     ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
1893     __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));       \
1894     break;                                                   \
1895   case kWord32Atomic##op##Uint8:                             \
1896   case kArm64Word64Atomic##op##Uint8:                        \
1897     ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
1898     break;                                                   \
1899   case kWord32Atomic##op##Int16:                             \
1900     ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
1901     __ Sxth(i.OutputRegister(0), i.OutputRegister(0));       \
1902     break;                                                   \
1903   case kWord32Atomic##op##Uint16:                            \
1904   case kArm64Word64Atomic##op##Uint16:                       \
1905     ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
1906     break;                                                   \
1907   case kWord32Atomic##op##Word32:                            \
1908   case kArm64Word64Atomic##op##Uint32:                       \
1909     ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register32);   \
1910     break;                                                   \
1911   case kArm64Word64Atomic##op##Uint64:                       \
1912     ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register);     \
1913     break;
1914       ATOMIC_BINOP_CASE(Add, Add)
1915       ATOMIC_BINOP_CASE(Sub, Sub)
1916       ATOMIC_BINOP_CASE(And, And)
1917       ATOMIC_BINOP_CASE(Or, Orr)
1918       ATOMIC_BINOP_CASE(Xor, Eor)
1919 #undef ATOMIC_BINOP_CASE
1920 #undef ASSEMBLE_SHIFT
1921 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
1922 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
1923 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
1924 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
1925 #undef ASSEMBLE_ATOMIC_BINOP
1926 #undef ASSEMBLE_IEEE754_BINOP
1927 #undef ASSEMBLE_IEEE754_UNOP
1928 
1929 #define SIMD_UNOP_CASE(Op, Instr, FORMAT)            \
1930   case Op:                                           \
1931     __ Instr(i.OutputSimd128Register().V##FORMAT(),  \
1932              i.InputSimd128Register(0).V##FORMAT()); \
1933     break;
1934 #define SIMD_BINOP_CASE(Op, Instr, FORMAT)           \
1935   case Op:                                           \
1936     __ Instr(i.OutputSimd128Register().V##FORMAT(),  \
1937              i.InputSimd128Register(0).V##FORMAT(),  \
1938              i.InputSimd128Register(1).V##FORMAT()); \
1939     break;
1940 #define SIMD_DESTRUCTIVE_BINOP_CASE(Op, Instr, FORMAT)     \
1941   case Op: {                                               \
1942     VRegister dst = i.OutputSimd128Register().V##FORMAT(); \
1943     DCHECK_EQ(dst, i.InputSimd128Register(0).V##FORMAT()); \
1944     __ Instr(dst, i.InputSimd128Register(1).V##FORMAT(),   \
1945              i.InputSimd128Register(2).V##FORMAT());       \
1946     break;                                                 \
1947   }
1948 
1949     case kArm64Sxtl: {
1950       VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1951       VectorFormat narrow = VectorFormatHalfWidth(wide);
1952       __ Sxtl(i.OutputSimd128Register().Format(wide),
1953               i.InputSimd128Register(0).Format(narrow));
1954       break;
1955     }
1956     case kArm64Sxtl2: {
1957       VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1958       VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
1959       __ Sxtl2(i.OutputSimd128Register().Format(wide),
1960                i.InputSimd128Register(0).Format(narrow));
1961       break;
1962     }
1963     case kArm64Uxtl: {
1964       VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1965       VectorFormat narrow = VectorFormatHalfWidth(wide);
1966       __ Uxtl(i.OutputSimd128Register().Format(wide),
1967               i.InputSimd128Register(0).Format(narrow));
1968       break;
1969     }
1970     case kArm64Uxtl2: {
1971       VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1972       VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
1973       __ Uxtl2(i.OutputSimd128Register().Format(wide),
1974                i.InputSimd128Register(0).Format(narrow));
1975       break;
1976     }
1977     case kArm64F64x2Splat: {
1978       __ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
1979       break;
1980     }
1981     case kArm64F64x2ExtractLane: {
1982       __ Mov(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D(),
1983              i.InputInt8(1));
1984       break;
1985     }
1986     case kArm64F64x2ReplaceLane: {
1987       VRegister dst = i.OutputSimd128Register().V2D(),
1988                 src1 = i.InputSimd128Register(0).V2D();
1989       if (dst != src1) {
1990         __ Mov(dst, src1);
1991       }
1992       __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V2D(), 0);
1993       break;
1994     }
1995       SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D);
1996       SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D);
1997       SIMD_UNOP_CASE(kArm64F64x2Sqrt, Fsqrt, 2D);
1998       SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D);
1999       SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D);
2000       SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D);
2001       SIMD_BINOP_CASE(kArm64F64x2Div, Fdiv, 2D);
2002       SIMD_BINOP_CASE(kArm64F64x2Min, Fmin, 2D);
2003       SIMD_BINOP_CASE(kArm64F64x2Max, Fmax, 2D);
2004       SIMD_BINOP_CASE(kArm64F64x2Eq, Fcmeq, 2D);
2005     case kArm64F64x2Ne: {
2006       VRegister dst = i.OutputSimd128Register().V2D();
2007       __ Fcmeq(dst, i.InputSimd128Register(0).V2D(),
2008                i.InputSimd128Register(1).V2D());
2009       __ Mvn(dst, dst);
2010       break;
2011     }
2012     case kArm64F64x2Lt: {
2013       __ Fcmgt(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
2014                i.InputSimd128Register(0).V2D());
2015       break;
2016     }
2017     case kArm64F64x2Le: {
2018       __ Fcmge(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
2019                i.InputSimd128Register(0).V2D());
2020       break;
2021     }
2022       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
2023       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
2024     case kArm64F64x2Pmin: {
2025       VRegister dst = i.OutputSimd128Register().V2D();
2026       VRegister lhs = i.InputSimd128Register(0).V2D();
2027       VRegister rhs = i.InputSimd128Register(1).V2D();
2028       // f64x2.pmin(lhs, rhs)
2029       // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
2030       // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
2031       __ Fcmgt(dst, lhs, rhs);
2032       __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2033       break;
2034     }
2035     case kArm64F64x2Pmax: {
2036       VRegister dst = i.OutputSimd128Register().V2D();
2037       VRegister lhs = i.InputSimd128Register(0).V2D();
2038       VRegister rhs = i.InputSimd128Register(1).V2D();
2039       // f64x2.pmax(lhs, rhs)
2040       // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
2041       __ Fcmgt(dst, rhs, lhs);
2042       __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2043       break;
2044     }
2045     case kArm64F32x4Splat: {
2046       __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
2047       break;
2048     }
2049     case kArm64F32x4ExtractLane: {
2050       __ Mov(i.OutputSimd128Register().S(), i.InputSimd128Register(0).V4S(),
2051              i.InputInt8(1));
2052       break;
2053     }
2054     case kArm64F32x4ReplaceLane: {
2055       VRegister dst = i.OutputSimd128Register().V4S(),
2056                 src1 = i.InputSimd128Register(0).V4S();
2057       if (dst != src1) {
2058         __ Mov(dst, src1);
2059       }
2060       __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V4S(), 0);
2061       break;
2062     }
2063       SIMD_UNOP_CASE(kArm64F32x4SConvertI32x4, Scvtf, 4S);
2064       SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S);
2065       SIMD_UNOP_CASE(kArm64F32x4Abs, Fabs, 4S);
2066       SIMD_UNOP_CASE(kArm64F32x4Neg, Fneg, 4S);
2067       SIMD_UNOP_CASE(kArm64F32x4Sqrt, Fsqrt, 4S);
2068       SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S);
2069       SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S);
2070       SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S);
2071       SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S);
2072       SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S);
2073       SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S);
2074       SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S);
2075       SIMD_BINOP_CASE(kArm64F32x4Min, Fmin, 4S);
2076       SIMD_BINOP_CASE(kArm64F32x4Max, Fmax, 4S);
2077       SIMD_BINOP_CASE(kArm64F32x4Eq, Fcmeq, 4S);
2078     case kArm64F32x4Ne: {
2079       VRegister dst = i.OutputSimd128Register().V4S();
2080       __ Fcmeq(dst, i.InputSimd128Register(0).V4S(),
2081                i.InputSimd128Register(1).V4S());
2082       __ Mvn(dst, dst);
2083       break;
2084     }
2085     case kArm64F32x4Lt: {
2086       __ Fcmgt(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
2087                i.InputSimd128Register(0).V4S());
2088       break;
2089     }
2090     case kArm64F32x4Le: {
2091       __ Fcmge(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
2092                i.InputSimd128Register(0).V4S());
2093       break;
2094     }
2095       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
2096       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
2097     case kArm64F32x4Pmin: {
2098       VRegister dst = i.OutputSimd128Register().V4S();
2099       VRegister lhs = i.InputSimd128Register(0).V4S();
2100       VRegister rhs = i.InputSimd128Register(1).V4S();
2101       // f32x4.pmin(lhs, rhs)
2102       // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
2103       // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
2104       __ Fcmgt(dst, lhs, rhs);
2105       __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2106       break;
2107     }
2108     case kArm64F32x4Pmax: {
2109       VRegister dst = i.OutputSimd128Register().V4S();
2110       VRegister lhs = i.InputSimd128Register(0).V4S();
2111       VRegister rhs = i.InputSimd128Register(1).V4S();
2112       // f32x4.pmax(lhs, rhs)
2113       // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
2114       __ Fcmgt(dst, rhs, lhs);
2115       __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2116       break;
2117     }
2118     case kArm64I64x2Splat: {
2119       __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
2120       break;
2121     }
2122     case kArm64I64x2ExtractLane: {
2123       __ Mov(i.OutputRegister64(), i.InputSimd128Register(0).V2D(),
2124              i.InputInt8(1));
2125       break;
2126     }
2127     case kArm64I64x2ReplaceLane: {
2128       VRegister dst = i.OutputSimd128Register().V2D(),
2129                 src1 = i.InputSimd128Register(0).V2D();
2130       if (dst != src1) {
2131         __ Mov(dst, src1);
2132       }
2133       __ Mov(dst, i.InputInt8(1), i.InputRegister64(2));
2134       break;
2135     }
2136       SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D);
2137     case kArm64I64x2Shl: {
2138       ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 6, V2D, Sshl, X);
2139       break;
2140     }
2141     case kArm64I64x2ShrS: {
2142       ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 6, V2D, Sshl, X);
2143       break;
2144     }
2145       SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D);
2146       SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D);
2147     case kArm64I64x2Mul: {
2148       UseScratchRegisterScope scope(tasm());
2149       VRegister dst = i.OutputSimd128Register();
2150       VRegister src1 = i.InputSimd128Register(0);
2151       VRegister src2 = i.InputSimd128Register(1);
2152       VRegister tmp1 = scope.AcquireSameSizeAs(dst);
2153       VRegister tmp2 = scope.AcquireSameSizeAs(dst);
2154       VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0));
2155 
2156       // This 2x64-bit multiplication is performed with several 32-bit
2157       // multiplications.
2158 
2159       // 64-bit numbers x and y, can be represented as:
2160       //   x = a + 2^32(b)
2161       //   y = c + 2^32(d)
2162 
2163       // A 64-bit multiplication is:
2164       //   x * y = ac + 2^32(ad + bc) + 2^64(bd)
2165       // note: `2^64(bd)` can be ignored, the value is too large to fit in
2166       // 64-bits.
2167 
2168       // This sequence implements a 2x64bit multiply, where the registers
2169       // `src1` and `src2` are split up into 32-bit components:
2170       //   src1 = |d|c|b|a|
2171       //   src2 = |h|g|f|e|
2172       //
2173       //   src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
2174 
2175       // Reverse the 32-bit elements in the 64-bit words.
2176       //   tmp2 = |g|h|e|f|
2177       __ Rev64(tmp2.V4S(), src2.V4S());
2178 
2179       // Calculate the high half components.
2180       //   tmp2 = |dg|ch|be|af|
2181       __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S());
2182 
2183       // Extract the low half components of src1.
2184       //   tmp1 = |c|a|
2185       __ Xtn(tmp1.V2S(), src1.V2D());
2186 
2187       // Sum the respective high half components.
2188       //   tmp2 = |dg+ch|be+af||dg+ch|be+af|
2189       __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
2190 
2191       // Extract the low half components of src2.
2192       //   tmp3 = |g|e|
2193       __ Xtn(tmp3.V2S(), src2.V2D());
2194 
2195       // Shift the high half components, into the high half.
2196       //   dst = |dg+ch << 32|be+af << 32|
2197       __ Shll(dst.V2D(), tmp2.V2S(), 32);
2198 
2199       // Multiply the low components together, and accumulate with the high
2200       // half.
2201       //   dst = |dst[1] + cg|dst[0] + ae|
2202       __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S());
2203 
2204       break;
2205     }
2206       SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D);
2207     case kArm64I64x2ShrU: {
2208       ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
2209       break;
2210     }
2211     case kArm64I32x4Splat: {
2212       __ Dup(i.OutputSimd128Register().V4S(), i.InputRegister32(0));
2213       break;
2214     }
2215     case kArm64I32x4ExtractLane: {
2216       __ Mov(i.OutputRegister32(), i.InputSimd128Register(0).V4S(),
2217              i.InputInt8(1));
2218       break;
2219     }
2220     case kArm64I32x4ReplaceLane: {
2221       VRegister dst = i.OutputSimd128Register().V4S(),
2222                 src1 = i.InputSimd128Register(0).V4S();
2223       if (dst != src1) {
2224         __ Mov(dst, src1);
2225       }
2226       __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2227       break;
2228     }
2229       SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
2230       SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
2231     case kArm64I32x4Shl: {
2232       ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
2233       break;
2234     }
2235     case kArm64I32x4ShrS: {
2236       ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 5, V4S, Sshl, W);
2237       break;
2238     }
2239       SIMD_BINOP_CASE(kArm64I32x4Add, Add, 4S);
2240       SIMD_BINOP_CASE(kArm64I32x4AddHoriz, Addp, 4S);
2241       SIMD_BINOP_CASE(kArm64I32x4Sub, Sub, 4S);
2242       SIMD_BINOP_CASE(kArm64I32x4Mul, Mul, 4S);
2243       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mla, Mla, 4S);
2244       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mls, Mls, 4S);
2245       SIMD_BINOP_CASE(kArm64I32x4MinS, Smin, 4S);
2246       SIMD_BINOP_CASE(kArm64I32x4MaxS, Smax, 4S);
2247       SIMD_BINOP_CASE(kArm64I32x4Eq, Cmeq, 4S);
2248     case kArm64I32x4Ne: {
2249       VRegister dst = i.OutputSimd128Register().V4S();
2250       __ Cmeq(dst, i.InputSimd128Register(0).V4S(),
2251               i.InputSimd128Register(1).V4S());
2252       __ Mvn(dst, dst);
2253       break;
2254     }
2255       SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S);
2256       SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S);
2257       SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
2258     case kArm64I32x4ShrU: {
2259       ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
2260       break;
2261     }
2262       SIMD_BINOP_CASE(kArm64I32x4MinU, Umin, 4S);
2263       SIMD_BINOP_CASE(kArm64I32x4MaxU, Umax, 4S);
2264       SIMD_BINOP_CASE(kArm64I32x4GtU, Cmhi, 4S);
2265       SIMD_BINOP_CASE(kArm64I32x4GeU, Cmhs, 4S);
2266       SIMD_UNOP_CASE(kArm64I32x4Abs, Abs, 4S);
2267     case kArm64I32x4BitMask: {
2268       Register dst = i.OutputRegister32();
2269       VRegister src = i.InputSimd128Register(0);
2270       VRegister tmp = i.TempSimd128Register(0);
2271       VRegister mask = i.TempSimd128Register(1);
2272 
2273       __ Sshr(tmp.V4S(), src.V4S(), 31);
2274       // Set i-th bit of each lane i. When AND with tmp, the lanes that
2275       // are signed will have i-th bit set, unsigned will be 0.
2276       __ Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
2277       __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2278       __ Addv(tmp.S(), tmp.V4S());
2279       __ Mov(dst.W(), tmp.V4S(), 0);
2280       break;
2281     }
2282     case kArm64I32x4DotI16x8S: {
2283       UseScratchRegisterScope scope(tasm());
2284       VRegister lhs = i.InputSimd128Register(0);
2285       VRegister rhs = i.InputSimd128Register(1);
2286       VRegister tmp1 = scope.AcquireV(kFormat4S);
2287       VRegister tmp2 = scope.AcquireV(kFormat4S);
2288       __ Smull(tmp1, lhs.V4H(), rhs.V4H());
2289       __ Smull2(tmp2, lhs.V8H(), rhs.V8H());
2290       __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
2291       break;
2292     }
2293     case kArm64I16x8Splat: {
2294       __ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
2295       break;
2296     }
2297     case kArm64I16x8ExtractLaneU: {
2298       __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
2299               i.InputInt8(1));
2300       break;
2301     }
2302     case kArm64I16x8ExtractLaneS: {
2303       __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
2304               i.InputInt8(1));
2305       break;
2306     }
2307     case kArm64I16x8ReplaceLane: {
2308       VRegister dst = i.OutputSimd128Register().V8H(),
2309                 src1 = i.InputSimd128Register(0).V8H();
2310       if (dst != src1) {
2311         __ Mov(dst, src1);
2312       }
2313       __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2314       break;
2315     }
2316       SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
2317     case kArm64I16x8Shl: {
2318       ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
2319       break;
2320     }
2321     case kArm64I16x8ShrS: {
2322       ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 4, V8H, Sshl, W);
2323       break;
2324     }
2325     case kArm64I16x8SConvertI32x4: {
2326       VRegister dst = i.OutputSimd128Register(),
2327                 src0 = i.InputSimd128Register(0),
2328                 src1 = i.InputSimd128Register(1);
2329       UseScratchRegisterScope scope(tasm());
2330       VRegister temp = scope.AcquireV(kFormat4S);
2331       if (dst == src1) {
2332         __ Mov(temp, src1.V4S());
2333         src1 = temp;
2334       }
2335       __ Sqxtn(dst.V4H(), src0.V4S());
2336       __ Sqxtn2(dst.V8H(), src1.V4S());
2337       break;
2338     }
2339       SIMD_BINOP_CASE(kArm64I16x8Add, Add, 8H);
2340       SIMD_BINOP_CASE(kArm64I16x8AddSatS, Sqadd, 8H);
2341       SIMD_BINOP_CASE(kArm64I16x8AddHoriz, Addp, 8H);
2342       SIMD_BINOP_CASE(kArm64I16x8Sub, Sub, 8H);
2343       SIMD_BINOP_CASE(kArm64I16x8SubSatS, Sqsub, 8H);
2344       SIMD_BINOP_CASE(kArm64I16x8Mul, Mul, 8H);
2345       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I16x8Mla, Mla, 8H);
2346       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I16x8Mls, Mls, 8H);
2347       SIMD_BINOP_CASE(kArm64I16x8MinS, Smin, 8H);
2348       SIMD_BINOP_CASE(kArm64I16x8MaxS, Smax, 8H);
2349       SIMD_BINOP_CASE(kArm64I16x8Eq, Cmeq, 8H);
2350     case kArm64I16x8Ne: {
2351       VRegister dst = i.OutputSimd128Register().V8H();
2352       __ Cmeq(dst, i.InputSimd128Register(0).V8H(),
2353               i.InputSimd128Register(1).V8H());
2354       __ Mvn(dst, dst);
2355       break;
2356     }
2357       SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H);
2358       SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H);
2359     case kArm64I16x8ShrU: {
2360       ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
2361       break;
2362     }
2363     case kArm64I16x8UConvertI32x4: {
2364       VRegister dst = i.OutputSimd128Register(),
2365                 src0 = i.InputSimd128Register(0),
2366                 src1 = i.InputSimd128Register(1);
2367       UseScratchRegisterScope scope(tasm());
2368       VRegister temp = scope.AcquireV(kFormat4S);
2369       if (dst == src1) {
2370         __ Mov(temp, src1.V4S());
2371         src1 = temp;
2372       }
2373       __ Sqxtun(dst.V4H(), src0.V4S());
2374       __ Sqxtun2(dst.V8H(), src1.V4S());
2375       break;
2376     }
2377       SIMD_BINOP_CASE(kArm64I16x8AddSatU, Uqadd, 8H);
2378       SIMD_BINOP_CASE(kArm64I16x8SubSatU, Uqsub, 8H);
2379       SIMD_BINOP_CASE(kArm64I16x8MinU, Umin, 8H);
2380       SIMD_BINOP_CASE(kArm64I16x8MaxU, Umax, 8H);
2381       SIMD_BINOP_CASE(kArm64I16x8GtU, Cmhi, 8H);
2382       SIMD_BINOP_CASE(kArm64I16x8GeU, Cmhs, 8H);
2383       SIMD_BINOP_CASE(kArm64I16x8RoundingAverageU, Urhadd, 8H);
2384       SIMD_BINOP_CASE(kArm64I16x8Q15MulRSatS, Sqrdmulh, 8H);
2385       SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
2386     case kArm64I16x8BitMask: {
2387       Register dst = i.OutputRegister32();
2388       VRegister src = i.InputSimd128Register(0);
2389       VRegister tmp = i.TempSimd128Register(0);
2390       VRegister mask = i.TempSimd128Register(1);
2391 
2392       __ Sshr(tmp.V8H(), src.V8H(), 15);
2393       // Set i-th bit of each lane i. When AND with tmp, the lanes that
2394       // are signed will have i-th bit set, unsigned will be 0.
2395       __ Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
2396       __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2397       __ Addv(tmp.H(), tmp.V8H());
2398       __ Mov(dst.W(), tmp.V8H(), 0);
2399       break;
2400     }
2401     case kArm64I8x16Splat: {
2402       __ Dup(i.OutputSimd128Register().V16B(), i.InputRegister32(0));
2403       break;
2404     }
2405     case kArm64I8x16ExtractLaneU: {
2406       __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
2407               i.InputInt8(1));
2408       break;
2409     }
2410     case kArm64I8x16ExtractLaneS: {
2411       __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
2412               i.InputInt8(1));
2413       break;
2414     }
2415     case kArm64I8x16ReplaceLane: {
2416       VRegister dst = i.OutputSimd128Register().V16B(),
2417                 src1 = i.InputSimd128Register(0).V16B();
2418       if (dst != src1) {
2419         __ Mov(dst, src1);
2420       }
2421       __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2422       break;
2423     }
2424       SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B);
2425     case kArm64I8x16Shl: {
2426       ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 3, V16B, Sshl, W);
2427       break;
2428     }
2429     case kArm64I8x16ShrS: {
2430       ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 3, V16B, Sshl, W);
2431       break;
2432     }
2433     case kArm64I8x16SConvertI16x8: {
2434       VRegister dst = i.OutputSimd128Register(),
2435                 src0 = i.InputSimd128Register(0),
2436                 src1 = i.InputSimd128Register(1);
2437       UseScratchRegisterScope scope(tasm());
2438       VRegister temp = scope.AcquireV(kFormat8H);
2439       if (dst == src1) {
2440         __ Mov(temp, src1.V8H());
2441         src1 = temp;
2442       }
2443       __ Sqxtn(dst.V8B(), src0.V8H());
2444       __ Sqxtn2(dst.V16B(), src1.V8H());
2445       break;
2446     }
2447       SIMD_BINOP_CASE(kArm64I8x16Add, Add, 16B);
2448       SIMD_BINOP_CASE(kArm64I8x16AddSatS, Sqadd, 16B);
2449       SIMD_BINOP_CASE(kArm64I8x16Sub, Sub, 16B);
2450       SIMD_BINOP_CASE(kArm64I8x16SubSatS, Sqsub, 16B);
2451       SIMD_BINOP_CASE(kArm64I8x16Mul, Mul, 16B);
2452       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I8x16Mla, Mla, 16B);
2453       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I8x16Mls, Mls, 16B);
2454       SIMD_BINOP_CASE(kArm64I8x16MinS, Smin, 16B);
2455       SIMD_BINOP_CASE(kArm64I8x16MaxS, Smax, 16B);
2456       SIMD_BINOP_CASE(kArm64I8x16Eq, Cmeq, 16B);
2457     case kArm64I8x16Ne: {
2458       VRegister dst = i.OutputSimd128Register().V16B();
2459       __ Cmeq(dst, i.InputSimd128Register(0).V16B(),
2460               i.InputSimd128Register(1).V16B());
2461       __ Mvn(dst, dst);
2462       break;
2463     }
2464       SIMD_BINOP_CASE(kArm64I8x16GtS, Cmgt, 16B);
2465       SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B);
2466     case kArm64I8x16ShrU: {
2467       ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 3, V16B, Ushl, W);
2468       break;
2469     }
2470     case kArm64I8x16UConvertI16x8: {
2471       VRegister dst = i.OutputSimd128Register(),
2472                 src0 = i.InputSimd128Register(0),
2473                 src1 = i.InputSimd128Register(1);
2474       UseScratchRegisterScope scope(tasm());
2475       VRegister temp = scope.AcquireV(kFormat8H);
2476       if (dst == src1) {
2477         __ Mov(temp, src1.V8H());
2478         src1 = temp;
2479       }
2480       __ Sqxtun(dst.V8B(), src0.V8H());
2481       __ Sqxtun2(dst.V16B(), src1.V8H());
2482       break;
2483     }
2484       SIMD_BINOP_CASE(kArm64I8x16AddSatU, Uqadd, 16B);
2485       SIMD_BINOP_CASE(kArm64I8x16SubSatU, Uqsub, 16B);
2486       SIMD_BINOP_CASE(kArm64I8x16MinU, Umin, 16B);
2487       SIMD_BINOP_CASE(kArm64I8x16MaxU, Umax, 16B);
2488       SIMD_BINOP_CASE(kArm64I8x16GtU, Cmhi, 16B);
2489       SIMD_BINOP_CASE(kArm64I8x16GeU, Cmhs, 16B);
2490       SIMD_BINOP_CASE(kArm64I8x16RoundingAverageU, Urhadd, 16B);
2491       SIMD_UNOP_CASE(kArm64I8x16Abs, Abs, 16B);
2492     case kArm64I8x16BitMask: {
2493       Register dst = i.OutputRegister32();
2494       VRegister src = i.InputSimd128Register(0);
2495       VRegister tmp = i.TempSimd128Register(0);
2496       VRegister mask = i.TempSimd128Register(1);
2497 
2498       // Set i-th bit of each lane i. When AND with tmp, the lanes that
2499       // are signed will have i-th bit set, unsigned will be 0.
2500       __ Sshr(tmp.V16B(), src.V16B(), 7);
2501       __ Movi(mask.V2D(), 0x8040'2010'0804'0201);
2502       __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2503       __ Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
2504       __ Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
2505       __ Addv(tmp.H(), tmp.V8H());
2506       __ Mov(dst.W(), tmp.V8H(), 0);
2507       break;
2508     }
2509     case kArm64S128Const: {
2510       uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
2511       uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
2512       __ Movi(i.OutputSimd128Register().V16B(), imm2, imm1);
2513       break;
2514     }
2515     case kArm64S128Zero: {
2516       VRegister dst = i.OutputSimd128Register().V16B();
2517       __ Eor(dst, dst, dst);
2518       break;
2519     }
2520       SIMD_BINOP_CASE(kArm64S128And, And, 16B);
2521       SIMD_BINOP_CASE(kArm64S128Or, Orr, 16B);
2522       SIMD_BINOP_CASE(kArm64S128Xor, Eor, 16B);
2523       SIMD_UNOP_CASE(kArm64S128Not, Mvn, 16B);
2524     case kArm64S128Dup: {
2525       VRegister dst = i.OutputSimd128Register(),
2526                 src = i.InputSimd128Register(0);
2527       int lanes = i.InputInt32(1);
2528       int index = i.InputInt32(2);
2529       switch (lanes) {
2530         case 4:
2531           __ Dup(dst.V4S(), src.V4S(), index);
2532           break;
2533         case 8:
2534           __ Dup(dst.V8H(), src.V8H(), index);
2535           break;
2536         case 16:
2537           __ Dup(dst.V16B(), src.V16B(), index);
2538           break;
2539         default:
2540           UNREACHABLE();
2541           break;
2542       }
2543       break;
2544     }
2545       SIMD_DESTRUCTIVE_BINOP_CASE(kArm64S128Select, Bsl, 16B);
2546       SIMD_BINOP_CASE(kArm64S128AndNot, Bic, 16B);
2547     case kArm64S32x4Shuffle: {
2548       Simd128Register dst = i.OutputSimd128Register().V4S(),
2549                       src0 = i.InputSimd128Register(0).V4S(),
2550                       src1 = i.InputSimd128Register(1).V4S();
2551       // Check for in-place shuffles.
2552       // If dst == src0 == src1, then the shuffle is unary and we only use src0.
2553       UseScratchRegisterScope scope(tasm());
2554       VRegister temp = scope.AcquireV(kFormat4S);
2555       if (dst == src0) {
2556         __ Mov(temp, src0);
2557         src0 = temp;
2558       } else if (dst == src1) {
2559         __ Mov(temp, src1);
2560         src1 = temp;
2561       }
2562       // Perform shuffle as a vmov per lane.
2563       int32_t shuffle = i.InputInt32(2);
2564       for (int i = 0; i < 4; i++) {
2565         VRegister src = src0;
2566         int lane = shuffle & 0x7;
2567         if (lane >= 4) {
2568           src = src1;
2569           lane &= 0x3;
2570         }
2571         __ Mov(dst, i, src, lane);
2572         shuffle >>= 8;
2573       }
2574       break;
2575     }
2576       SIMD_BINOP_CASE(kArm64S32x4ZipLeft, Zip1, 4S);
2577       SIMD_BINOP_CASE(kArm64S32x4ZipRight, Zip2, 4S);
2578       SIMD_BINOP_CASE(kArm64S32x4UnzipLeft, Uzp1, 4S);
2579       SIMD_BINOP_CASE(kArm64S32x4UnzipRight, Uzp2, 4S);
2580       SIMD_BINOP_CASE(kArm64S32x4TransposeLeft, Trn1, 4S);
2581       SIMD_BINOP_CASE(kArm64S32x4TransposeRight, Trn2, 4S);
2582       SIMD_BINOP_CASE(kArm64S16x8ZipLeft, Zip1, 8H);
2583       SIMD_BINOP_CASE(kArm64S16x8ZipRight, Zip2, 8H);
2584       SIMD_BINOP_CASE(kArm64S16x8UnzipLeft, Uzp1, 8H);
2585       SIMD_BINOP_CASE(kArm64S16x8UnzipRight, Uzp2, 8H);
2586       SIMD_BINOP_CASE(kArm64S16x8TransposeLeft, Trn1, 8H);
2587       SIMD_BINOP_CASE(kArm64S16x8TransposeRight, Trn2, 8H);
2588       SIMD_BINOP_CASE(kArm64S8x16ZipLeft, Zip1, 16B);
2589       SIMD_BINOP_CASE(kArm64S8x16ZipRight, Zip2, 16B);
2590       SIMD_BINOP_CASE(kArm64S8x16UnzipLeft, Uzp1, 16B);
2591       SIMD_BINOP_CASE(kArm64S8x16UnzipRight, Uzp2, 16B);
2592       SIMD_BINOP_CASE(kArm64S8x16TransposeLeft, Trn1, 16B);
2593       SIMD_BINOP_CASE(kArm64S8x16TransposeRight, Trn2, 16B);
2594     case kArm64S8x16Concat: {
2595       __ Ext(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
2596              i.InputSimd128Register(1).V16B(), i.InputInt4(2));
2597       break;
2598     }
2599     case kArm64I8x16Swizzle: {
2600       __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
2601              i.InputSimd128Register(1).V16B());
2602       break;
2603     }
2604     case kArm64I8x16Shuffle: {
2605       Simd128Register dst = i.OutputSimd128Register().V16B(),
2606                       src0 = i.InputSimd128Register(0).V16B(),
2607                       src1 = i.InputSimd128Register(1).V16B();
2608       // Unary shuffle table is in src0, binary shuffle table is in src0, src1,
2609       // which must be consecutive.
2610       if (src0 != src1) {
2611         DCHECK(AreConsecutive(src0, src1));
2612       }
2613 
2614       int64_t imm1 = make_uint64(i.InputInt32(3), i.InputInt32(2));
2615       int64_t imm2 = make_uint64(i.InputInt32(5), i.InputInt32(4));
2616       DCHECK_EQ(0, (imm1 | imm2) & (src0 == src1 ? 0xF0F0F0F0F0F0F0F0
2617                                                  : 0xE0E0E0E0E0E0E0E0));
2618 
2619       UseScratchRegisterScope scope(tasm());
2620       VRegister temp = scope.AcquireV(kFormat16B);
2621       __ Movi(temp, imm2, imm1);
2622 
2623       if (src0 == src1) {
2624         __ Tbl(dst, src0, temp.V16B());
2625       } else {
2626         __ Tbl(dst, src0, src1, temp.V16B());
2627       }
2628       break;
2629     }
2630       SIMD_UNOP_CASE(kArm64S32x2Reverse, Rev64, 4S);
2631       SIMD_UNOP_CASE(kArm64S16x4Reverse, Rev64, 8H);
2632       SIMD_UNOP_CASE(kArm64S16x2Reverse, Rev32, 8H);
2633       SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
2634       SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
2635       SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
2636     case kArm64LoadSplat: {
2637       VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
2638       __ ld1r(i.OutputSimd128Register().Format(f), i.MemoryOperand(0));
2639       break;
2640     }
2641     case kArm64S128Load8x8S: {
2642       __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
2643       __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
2644       break;
2645     }
2646     case kArm64S128Load8x8U: {
2647       __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
2648       __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
2649       break;
2650     }
2651     case kArm64S128Load16x4S: {
2652       __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
2653       __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
2654       break;
2655     }
2656     case kArm64S128Load16x4U: {
2657       __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
2658       __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
2659       break;
2660     }
2661     case kArm64S128Load32x2S: {
2662       __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
2663       __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
2664       break;
2665     }
2666     case kArm64S128Load32x2U: {
2667       __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
2668       __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
2669       break;
2670     }
2671     case kArm64S128Load32Zero: {
2672       __ Ldr(i.OutputSimd128Register().S(), i.MemoryOperand(0));
2673       break;
2674     }
2675     case kArm64S128Load64Zero: {
2676       __ Ldr(i.OutputSimd128Register().D(), i.MemoryOperand(0));
2677       break;
2678     }
2679 #define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT)     \
2680   case Op: {                                               \
2681     UseScratchRegisterScope scope(tasm());                 \
2682     VRegister temp = scope.AcquireV(format);               \
2683     __ Instr(temp, i.InputSimd128Register(0).V##FORMAT()); \
2684     __ Umov(i.OutputRegister32(), temp, 0);                \
2685     __ Cmp(i.OutputRegister32(), 0);                       \
2686     __ Cset(i.OutputRegister32(), ne);                     \
2687     break;                                                 \
2688   }
2689       // For AnyTrue, the format does not matter.
2690       SIMD_REDUCE_OP_CASE(kArm64V128AnyTrue, Umaxv, kFormatS, 4S);
2691       SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S);
2692       SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H);
2693       SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B);
2694   }
2695   return kSuccess;
2696 }  // NOLINT(readability/fn_size)
2697 
2698 #undef SIMD_UNOP_CASE
2699 #undef SIMD_BINOP_CASE
2700 #undef SIMD_DESTRUCTIVE_BINOP_CASE
2701 #undef SIMD_REDUCE_OP_CASE
2702 #undef ASSEMBLE_SIMD_SHIFT_LEFT
2703 #undef ASSEMBLE_SIMD_SHIFT_RIGHT
2704 
2705 // Assemble branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)2706 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2707   Arm64OperandConverter i(this, instr);
2708   Label* tlabel = branch->true_label;
2709   Label* flabel = branch->false_label;
2710   FlagsCondition condition = branch->condition;
2711   ArchOpcode opcode = instr->arch_opcode();
2712 
2713   if (opcode == kArm64CompareAndBranch32) {
2714     DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2715     switch (condition) {
2716       case kEqual:
2717         __ Cbz(i.InputRegister32(0), tlabel);
2718         break;
2719       case kNotEqual:
2720         __ Cbnz(i.InputRegister32(0), tlabel);
2721         break;
2722       default:
2723         UNREACHABLE();
2724     }
2725   } else if (opcode == kArm64CompareAndBranch) {
2726     DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2727     switch (condition) {
2728       case kEqual:
2729         __ Cbz(i.InputRegister64(0), tlabel);
2730         break;
2731       case kNotEqual:
2732         __ Cbnz(i.InputRegister64(0), tlabel);
2733         break;
2734       default:
2735         UNREACHABLE();
2736     }
2737   } else if (opcode == kArm64TestAndBranch32) {
2738     DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2739     switch (condition) {
2740       case kEqual:
2741         __ Tbz(i.InputRegister32(0), i.InputInt5(1), tlabel);
2742         break;
2743       case kNotEqual:
2744         __ Tbnz(i.InputRegister32(0), i.InputInt5(1), tlabel);
2745         break;
2746       default:
2747         UNREACHABLE();
2748     }
2749   } else if (opcode == kArm64TestAndBranch) {
2750     DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2751     switch (condition) {
2752       case kEqual:
2753         __ Tbz(i.InputRegister64(0), i.InputInt6(1), tlabel);
2754         break;
2755       case kNotEqual:
2756         __ Tbnz(i.InputRegister64(0), i.InputInt6(1), tlabel);
2757         break;
2758       default:
2759         UNREACHABLE();
2760     }
2761   } else {
2762     Condition cc = FlagsConditionToCondition(condition);
2763     __ B(cc, tlabel);
2764   }
2765   if (!branch->fallthru) __ B(flabel);  // no fallthru to flabel.
2766 }
2767 
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)2768 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2769                                             Instruction* instr) {
2770   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2771   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2772     return;
2773   }
2774 
2775   condition = NegateFlagsCondition(condition);
2776   __ CmovX(kSpeculationPoisonRegister, xzr,
2777            FlagsConditionToCondition(condition));
2778   __ Csdb();
2779 }
2780 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)2781 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2782                                             BranchInfo* branch) {
2783   AssembleArchBranch(instr, branch);
2784 }
2785 
AssembleArchJump(RpoNumber target)2786 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2787   if (!IsNextInAssemblyOrder(target)) __ B(GetLabel(target));
2788 }
2789 
AssembleArchTrap(Instruction * instr,FlagsCondition condition)2790 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2791                                      FlagsCondition condition) {
2792   class OutOfLineTrap final : public OutOfLineCode {
2793    public:
2794     OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
2795         : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
2796     void Generate() final {
2797       Arm64OperandConverter i(gen_, instr_);
2798       TrapId trap_id =
2799           static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
2800       GenerateCallToTrap(trap_id);
2801     }
2802 
2803    private:
2804     void GenerateCallToTrap(TrapId trap_id) {
2805       if (trap_id == TrapId::kInvalid) {
2806         // We cannot test calls to the runtime in cctest/test-run-wasm.
2807         // Therefore we emit a call to C here instead of a call to the runtime.
2808         __ CallCFunction(
2809             ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2810         __ LeaveFrame(StackFrame::WASM);
2811         auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
2812         int pop_count =
2813             static_cast<int>(call_descriptor->StackParameterCount());
2814         pop_count += (pop_count & 1);  // align
2815         __ Drop(pop_count);
2816         __ Ret();
2817       } else {
2818         gen_->AssembleSourcePosition(instr_);
2819         // A direct call to a wasm runtime stub defined in this module.
2820         // Just encode the stub index. This will be patched when the code
2821         // is added to the native module and copied into wasm code space.
2822         __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
2823         ReferenceMap* reference_map =
2824             gen_->zone()->New<ReferenceMap>(gen_->zone());
2825         gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
2826         if (FLAG_debug_code) {
2827           // The trap code should never return.
2828           __ Brk(0);
2829         }
2830       }
2831     }
2832     Instruction* instr_;
2833     CodeGenerator* gen_;
2834   };
2835   auto ool = zone()->New<OutOfLineTrap>(this, instr);
2836   Label* tlabel = ool->entry();
2837   Condition cc = FlagsConditionToCondition(condition);
2838   __ B(cc, tlabel);
2839 }
2840 
2841 // Assemble boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)2842 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2843                                         FlagsCondition condition) {
2844   Arm64OperandConverter i(this, instr);
2845 
2846   // Materialize a full 64-bit 1 or 0 value. The result register is always the
2847   // last output of the instruction.
2848   DCHECK_NE(0u, instr->OutputCount());
2849   Register reg = i.OutputRegister(instr->OutputCount() - 1);
2850   Condition cc = FlagsConditionToCondition(condition);
2851   __ Cset(reg, cc);
2852 }
2853 
AssembleArchBinarySearchSwitch(Instruction * instr)2854 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2855   Arm64OperandConverter i(this, instr);
2856   Register input = i.InputRegister32(0);
2857   std::vector<std::pair<int32_t, Label*>> cases;
2858   for (size_t index = 2; index < instr->InputCount(); index += 2) {
2859     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2860   }
2861   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2862                                       cases.data() + cases.size());
2863 }
2864 
AssembleArchTableSwitch(Instruction * instr)2865 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
2866   Arm64OperandConverter i(this, instr);
2867   UseScratchRegisterScope scope(tasm());
2868   Register input = i.InputRegister32(0);
2869   Register temp = scope.AcquireX();
2870   size_t const case_count = instr->InputCount() - 2;
2871   Label table;
2872   __ Cmp(input, case_count);
2873   __ B(hs, GetLabel(i.InputRpo(1)));
2874   __ Adr(temp, &table);
2875   int entry_size_log2 = 2;
2876 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
2877   ++entry_size_log2;  // Account for BTI.
2878 #endif
2879   __ Add(temp, temp, Operand(input, UXTW, entry_size_log2));
2880   __ Br(temp);
2881   {
2882     TurboAssembler::BlockPoolsScope block_pools(tasm(),
2883                                                 case_count * kInstrSize);
2884     __ Bind(&table);
2885     for (size_t index = 0; index < case_count; ++index) {
2886       __ JumpTarget();
2887       __ B(GetLabel(i.InputRpo(index + 2)));
2888     }
2889     __ JumpTarget();
2890   }
2891 }
2892 
FinishFrame(Frame * frame)2893 void CodeGenerator::FinishFrame(Frame* frame) {
2894   frame->AlignFrame(16);
2895   auto call_descriptor = linkage()->GetIncomingDescriptor();
2896 
2897   // Save FP registers.
2898   CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2899                                    call_descriptor->CalleeSavedFPRegisters());
2900   int saved_count = saves_fp.Count();
2901   if (saved_count != 0) {
2902     DCHECK(saves_fp.list() == CPURegList::GetCalleeSavedV().list());
2903     DCHECK_EQ(saved_count % 2, 0);
2904     frame->AllocateSavedCalleeRegisterSlots(saved_count *
2905                                             (kDoubleSize / kSystemPointerSize));
2906   }
2907 
2908   CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2909                                 call_descriptor->CalleeSavedRegisters());
2910   saved_count = saves.Count();
2911   if (saved_count != 0) {
2912     DCHECK_EQ(saved_count % 2, 0);
2913     frame->AllocateSavedCalleeRegisterSlots(saved_count);
2914   }
2915 }
2916 
AssembleConstructFrame()2917 void CodeGenerator::AssembleConstructFrame() {
2918   auto call_descriptor = linkage()->GetIncomingDescriptor();
2919   __ AssertSpAligned();
2920 
2921   // The frame has been previously padded in CodeGenerator::FinishFrame().
2922   DCHECK_EQ(frame()->GetTotalFrameSlotCount() % 2, 0);
2923   int required_slots =
2924       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
2925 
2926   CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2927                                 call_descriptor->CalleeSavedRegisters());
2928   DCHECK_EQ(saves.Count() % 2, 0);
2929   CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2930                                    call_descriptor->CalleeSavedFPRegisters());
2931   DCHECK_EQ(saves_fp.Count() % 2, 0);
2932   // The number of slots for returns has to be even to ensure the correct stack
2933   // alignment.
2934   const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
2935 
2936   if (frame_access_state()->has_frame()) {
2937     // Link the frame
2938     if (call_descriptor->IsJSFunctionCall()) {
2939       STATIC_ASSERT(InterpreterFrameConstants::kFixedFrameSize % 16 == 8);
2940       DCHECK_EQ(required_slots % 2, 1);
2941       __ Prologue();
2942       // Update required_slots count since we have just claimed one extra slot.
2943       STATIC_ASSERT(TurboAssembler::kExtraSlotClaimedByPrologue == 1);
2944       required_slots -= TurboAssembler::kExtraSlotClaimedByPrologue;
2945     } else {
2946       __ Push<TurboAssembler::kSignLR>(lr, fp);
2947       __ Mov(fp, sp);
2948     }
2949     unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
2950 
2951     // Create OSR entry if applicable
2952     if (info()->is_osr()) {
2953       // TurboFan OSR-compiled functions cannot be entered directly.
2954       __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
2955 
2956       // Unoptimized code jumps directly to this entrypoint while the
2957       // unoptimized frame is still on the stack. Optimized code uses OSR values
2958       // directly from the unoptimized frame. Thus, all that needs to be done is
2959       // to allocate the remaining stack slots.
2960       if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
2961       osr_pc_offset_ = __ pc_offset();
2962       size_t unoptimized_frame_slots = osr_helper()->UnoptimizedFrameSlots();
2963       DCHECK(call_descriptor->IsJSFunctionCall());
2964       DCHECK_EQ(unoptimized_frame_slots % 2, 1);
2965       // One unoptimized frame slot has already been claimed when the actual
2966       // arguments count was pushed.
2967       required_slots -=
2968           unoptimized_frame_slots - TurboAssembler::kExtraSlotClaimedByPrologue;
2969       ResetSpeculationPoison();
2970     }
2971 
2972     if (info()->IsWasm() && required_slots > 128) {
2973       // For WebAssembly functions with big frames we have to do the stack
2974       // overflow check before we construct the frame. Otherwise we may not
2975       // have enough space on the stack to call the runtime for the stack
2976       // overflow.
2977       Label done;
2978       // If the frame is bigger than the stack, we throw the stack overflow
2979       // exception unconditionally. Thereby we can avoid the integer overflow
2980       // check in the condition code.
2981       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
2982         UseScratchRegisterScope scope(tasm());
2983         Register scratch = scope.AcquireX();
2984         __ Ldr(scratch, FieldMemOperand(
2985                             kWasmInstanceRegister,
2986                             WasmInstanceObject::kRealStackLimitAddressOffset));
2987         __ Ldr(scratch, MemOperand(scratch));
2988         __ Add(scratch, scratch, required_slots * kSystemPointerSize);
2989         __ Cmp(sp, scratch);
2990         __ B(hs, &done);
2991       }
2992 
2993       {
2994         // Finish the frame that hasn't been fully built yet.
2995         UseScratchRegisterScope temps(tasm());
2996         Register scratch = temps.AcquireX();
2997         __ Mov(scratch,
2998                StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
2999         __ Push(scratch, kWasmInstanceRegister);
3000       }
3001 
3002       __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
3003       // We come from WebAssembly, there are no references for the GC.
3004       ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3005       RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
3006       if (FLAG_debug_code) {
3007         __ Brk(0);
3008       }
3009       __ Bind(&done);
3010     }
3011 
3012     // Skip callee-saved slots, which are pushed below.
3013     required_slots -= saves.Count();
3014     required_slots -= saves_fp.Count();
3015     required_slots -= returns;
3016 
3017     // Build remainder of frame, including accounting for and filling-in
3018     // frame-specific header information, i.e. claiming the extra slot that
3019     // other platforms explicitly push for STUB (code object) frames and frames
3020     // recording their argument count.
3021     switch (call_descriptor->kind()) {
3022       case CallDescriptor::kCallJSFunction:
3023         __ Claim(required_slots);
3024         break;
3025       case CallDescriptor::kCallCodeObject: {
3026         UseScratchRegisterScope temps(tasm());
3027         Register scratch = temps.AcquireX();
3028         __ Mov(scratch,
3029                StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3030         __ Push(scratch, padreg);
3031         // One of the extra slots has just been claimed when pushing the frame
3032         // type marker above. We also know that we have at least one slot to
3033         // claim here, as the typed frame has an odd number of fixed slots, and
3034         // all other parts of the total frame slots are even, leaving
3035         // {required_slots} to be odd.
3036         DCHECK_GE(required_slots, 1);
3037         __ Claim(required_slots - 1);
3038       } break;
3039       case CallDescriptor::kCallWasmFunction: {
3040         UseScratchRegisterScope temps(tasm());
3041         Register scratch = temps.AcquireX();
3042         __ Mov(scratch,
3043                StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3044         __ Push(scratch, kWasmInstanceRegister);
3045         __ Claim(required_slots);
3046       } break;
3047       case CallDescriptor::kCallWasmImportWrapper:
3048       case CallDescriptor::kCallWasmCapiFunction: {
3049         UseScratchRegisterScope temps(tasm());
3050         __ LoadTaggedPointerField(
3051             kJSFunctionRegister,
3052             FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3053         __ LoadTaggedPointerField(
3054             kWasmInstanceRegister,
3055             FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3056         Register scratch = temps.AcquireX();
3057         __ Mov(scratch,
3058                StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3059         __ Push(scratch, kWasmInstanceRegister);
3060         int extra_slots =
3061             call_descriptor->kind() == CallDescriptor::kCallWasmImportWrapper
3062                 ? 0   // Import wrapper: none.
3063                 : 1;  // C-API function: PC.
3064         __ Claim(required_slots + extra_slots);
3065       } break;
3066       case CallDescriptor::kCallAddress:
3067         if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
3068           UseScratchRegisterScope temps(tasm());
3069           Register scratch = temps.AcquireX();
3070           __ Mov(scratch, StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY));
3071           __ Push(scratch, padreg);
3072           // The additional slot will be used for the saved c_entry_fp.
3073         }
3074         __ Claim(required_slots);
3075         break;
3076       default:
3077         UNREACHABLE();
3078     }
3079   }
3080 
3081   // Save FP registers.
3082   DCHECK_IMPLIES(saves_fp.Count() != 0,
3083                  saves_fp.list() == CPURegList::GetCalleeSavedV().list());
3084   __ PushCPURegList(saves_fp);
3085 
3086   // Save registers.
3087   DCHECK_IMPLIES(!saves.IsEmpty(),
3088                  saves.list() == CPURegList::GetCalleeSaved().list());
3089   __ PushCPURegList<TurboAssembler::kSignLR>(saves);
3090 
3091   if (returns != 0) {
3092     __ Claim(returns);
3093   }
3094 }
3095 
AssembleReturn(InstructionOperand * additional_pop_count)3096 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
3097   auto call_descriptor = linkage()->GetIncomingDescriptor();
3098 
3099   const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
3100   if (returns != 0) {
3101     __ Drop(returns);
3102   }
3103 
3104   // Restore registers.
3105   CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
3106                                 call_descriptor->CalleeSavedRegisters());
3107   __ PopCPURegList<TurboAssembler::kAuthLR>(saves);
3108 
3109   // Restore fp registers.
3110   CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
3111                                    call_descriptor->CalleeSavedFPRegisters());
3112   __ PopCPURegList(saves_fp);
3113 
3114   unwinding_info_writer_.MarkBlockWillExit();
3115 
3116   // We might need x3 for scratch.
3117   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & x3.bit());
3118   const int parameter_count =
3119       static_cast<int>(call_descriptor->StackParameterCount());
3120   Arm64OperandConverter g(this, nullptr);
3121 
3122   // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
3123   // Check RawMachineAssembler::PopAndReturn.
3124   if (parameter_count != 0) {
3125     if (additional_pop_count->IsImmediate()) {
3126       DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
3127     } else if (__ emit_debug_code()) {
3128       __ cmp(g.ToRegister(additional_pop_count), Operand(0));
3129       __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
3130     }
3131   }
3132 
3133   Register argc_reg = x3;
3134 #ifdef V8_NO_ARGUMENTS_ADAPTOR
3135   // Functions with JS linkage have at least one parameter (the receiver).
3136   // If {parameter_count} == 0, it means it is a builtin with
3137   // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
3138   // itself.
3139   const bool drop_jsargs = frame_access_state()->has_frame() &&
3140                            call_descriptor->IsJSFunctionCall() &&
3141                            parameter_count != 0;
3142 #else
3143   const bool drop_jsargs = false;
3144 #endif
3145   if (call_descriptor->IsCFunctionCall()) {
3146     AssembleDeconstructFrame();
3147   } else if (frame_access_state()->has_frame()) {
3148     // Canonicalize JSFunction return sites for now unless they have an variable
3149     // number of stack slot pops.
3150     if (additional_pop_count->IsImmediate() &&
3151         g.ToConstant(additional_pop_count).ToInt32() == 0) {
3152       if (return_label_.is_bound()) {
3153         __ B(&return_label_);
3154         return;
3155       } else {
3156         __ Bind(&return_label_);
3157       }
3158     }
3159     if (drop_jsargs) {
3160       // Get the actual argument count.
3161       __ Ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
3162     }
3163     AssembleDeconstructFrame();
3164   }
3165 
3166   if (drop_jsargs) {
3167     // We must pop all arguments from the stack (including the receiver). This
3168     // number of arguments is given by max(1 + argc_reg, parameter_count).
3169     Label argc_reg_has_final_count;
3170     __ Add(argc_reg, argc_reg, 1);  // Consider the receiver.
3171     if (parameter_count > 1) {
3172       __ Cmp(argc_reg, Operand(parameter_count));
3173       __ B(&argc_reg_has_final_count, ge);
3174       __ Mov(argc_reg, Operand(parameter_count));
3175       __ Bind(&argc_reg_has_final_count);
3176     }
3177     __ DropArguments(argc_reg);
3178   } else if (additional_pop_count->IsImmediate()) {
3179     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
3180     __ DropArguments(parameter_count + additional_count);
3181   } else if (parameter_count == 0) {
3182     __ DropArguments(g.ToRegister(additional_pop_count));
3183   } else {
3184     // {additional_pop_count} is guaranteed to be zero if {parameter_count !=
3185     // 0}. Check RawMachineAssembler::PopAndReturn.
3186     __ DropArguments(parameter_count);
3187   }
3188   __ AssertSpAligned();
3189   __ Ret();
3190 }
3191 
FinishCode()3192 void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); }
3193 
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)3194 void CodeGenerator::PrepareForDeoptimizationExits(
3195     ZoneDeque<DeoptimizationExit*>* exits) {
3196   __ ForceConstantPoolEmissionWithoutJump();
3197   // We are conservative here, assuming all deopts are lazy deopts.
3198   DCHECK_GE(Deoptimizer::kLazyDeoptExitSize,
3199             Deoptimizer::kNonLazyDeoptExitSize);
3200   __ CheckVeneerPool(
3201       false, false,
3202       static_cast<int>(exits->size()) * Deoptimizer::kLazyDeoptExitSize);
3203 
3204   // Check which deopt kinds exist in this Code object, to avoid emitting jumps
3205   // to unused entries.
3206   bool saw_deopt_kind[kDeoptimizeKindCount] = {false};
3207   for (auto exit : *exits) {
3208     saw_deopt_kind[static_cast<int>(exit->kind())] = true;
3209   }
3210 
3211   // Emit the jumps to deoptimization entries.
3212   UseScratchRegisterScope scope(tasm());
3213   Register scratch = scope.AcquireX();
3214   STATIC_ASSERT(static_cast<int>(kFirstDeoptimizeKind) == 0);
3215   for (int i = 0; i < kDeoptimizeKindCount; i++) {
3216     if (!saw_deopt_kind[i]) continue;
3217     __ bind(&jump_deoptimization_entry_labels_[i]);
3218     __ LoadEntryFromBuiltinIndex(Deoptimizer::GetDeoptimizationEntry(
3219                                      isolate(), static_cast<DeoptimizeKind>(i)),
3220                                  scratch);
3221     __ Jump(scratch);
3222   }
3223 }
3224 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)3225 void CodeGenerator::AssembleMove(InstructionOperand* source,
3226                                  InstructionOperand* destination) {
3227   Arm64OperandConverter g(this, nullptr);
3228   // Helper function to write the given constant to the dst register.
3229   auto MoveConstantToRegister = [&](Register dst, Constant src) {
3230     if (src.type() == Constant::kHeapObject) {
3231       Handle<HeapObject> src_object = src.ToHeapObject();
3232       RootIndex index;
3233       if (IsMaterializableFromRoot(src_object, &index)) {
3234         __ LoadRoot(dst, index);
3235       } else {
3236         __ Mov(dst, src_object);
3237       }
3238     } else if (src.type() == Constant::kCompressedHeapObject) {
3239       Handle<HeapObject> src_object = src.ToHeapObject();
3240       RootIndex index;
3241       if (IsMaterializableFromRoot(src_object, &index)) {
3242         __ LoadRoot(dst, index);
3243       } else {
3244         // TODO(v8:8977): Even though this mov happens on 32 bits (Note the
3245         // .W()) and we are passing along the RelocInfo, we still haven't made
3246         // the address embedded in the code-stream actually be compressed.
3247         __ Mov(dst.W(),
3248                Immediate(src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT));
3249       }
3250     } else {
3251       __ Mov(dst, g.ToImmediate(source));
3252     }
3253   };
3254   switch (MoveType::InferMove(source, destination)) {
3255     case MoveType::kRegisterToRegister:
3256       if (source->IsRegister()) {
3257         __ Mov(g.ToRegister(destination), g.ToRegister(source));
3258       } else if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3259         __ Mov(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3260       } else {
3261         DCHECK(source->IsSimd128Register());
3262         __ Mov(g.ToDoubleRegister(destination).Q(),
3263                g.ToDoubleRegister(source).Q());
3264       }
3265       return;
3266     case MoveType::kRegisterToStack: {
3267       MemOperand dst = g.ToMemOperand(destination, tasm());
3268       if (source->IsRegister()) {
3269         __ Str(g.ToRegister(source), dst);
3270       } else {
3271         VRegister src = g.ToDoubleRegister(source);
3272         if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3273           __ Str(src, dst);
3274         } else {
3275           DCHECK(source->IsSimd128Register());
3276           __ Str(src.Q(), dst);
3277         }
3278       }
3279       return;
3280     }
3281     case MoveType::kStackToRegister: {
3282       MemOperand src = g.ToMemOperand(source, tasm());
3283       if (destination->IsRegister()) {
3284         __ Ldr(g.ToRegister(destination), src);
3285       } else {
3286         VRegister dst = g.ToDoubleRegister(destination);
3287         if (destination->IsFloatRegister() || destination->IsDoubleRegister()) {
3288           __ Ldr(dst, src);
3289         } else {
3290           DCHECK(destination->IsSimd128Register());
3291           __ Ldr(dst.Q(), src);
3292         }
3293       }
3294       return;
3295     }
3296     case MoveType::kStackToStack: {
3297       MemOperand src = g.ToMemOperand(source, tasm());
3298       MemOperand dst = g.ToMemOperand(destination, tasm());
3299       if (source->IsSimd128StackSlot()) {
3300         UseScratchRegisterScope scope(tasm());
3301         VRegister temp = scope.AcquireQ();
3302         __ Ldr(temp, src);
3303         __ Str(temp, dst);
3304       } else {
3305         UseScratchRegisterScope scope(tasm());
3306         Register temp = scope.AcquireX();
3307         __ Ldr(temp, src);
3308         __ Str(temp, dst);
3309       }
3310       return;
3311     }
3312     case MoveType::kConstantToRegister: {
3313       Constant src = g.ToConstant(source);
3314       if (destination->IsRegister()) {
3315         MoveConstantToRegister(g.ToRegister(destination), src);
3316       } else {
3317         VRegister dst = g.ToDoubleRegister(destination);
3318         if (destination->IsFloatRegister()) {
3319           __ Fmov(dst.S(), src.ToFloat32());
3320         } else {
3321           DCHECK(destination->IsDoubleRegister());
3322           __ Fmov(dst, src.ToFloat64().value());
3323         }
3324       }
3325       return;
3326     }
3327     case MoveType::kConstantToStack: {
3328       Constant src = g.ToConstant(source);
3329       MemOperand dst = g.ToMemOperand(destination, tasm());
3330       if (destination->IsStackSlot()) {
3331         UseScratchRegisterScope scope(tasm());
3332         Register temp = scope.AcquireX();
3333         MoveConstantToRegister(temp, src);
3334         __ Str(temp, dst);
3335       } else if (destination->IsFloatStackSlot()) {
3336         if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
3337           __ Str(wzr, dst);
3338         } else {
3339           UseScratchRegisterScope scope(tasm());
3340           VRegister temp = scope.AcquireS();
3341           __ Fmov(temp, src.ToFloat32());
3342           __ Str(temp, dst);
3343         }
3344       } else {
3345         DCHECK(destination->IsDoubleStackSlot());
3346         if (src.ToFloat64().AsUint64() == 0) {
3347           __ Str(xzr, dst);
3348         } else {
3349           UseScratchRegisterScope scope(tasm());
3350           VRegister temp = scope.AcquireD();
3351           __ Fmov(temp, src.ToFloat64().value());
3352           __ Str(temp, dst);
3353         }
3354       }
3355       return;
3356     }
3357   }
3358   UNREACHABLE();
3359 }
3360 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)3361 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3362                                  InstructionOperand* destination) {
3363   Arm64OperandConverter g(this, nullptr);
3364   switch (MoveType::InferSwap(source, destination)) {
3365     case MoveType::kRegisterToRegister:
3366       if (source->IsRegister()) {
3367         __ Swap(g.ToRegister(source), g.ToRegister(destination));
3368       } else {
3369         VRegister src = g.ToDoubleRegister(source);
3370         VRegister dst = g.ToDoubleRegister(destination);
3371         if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3372           __ Swap(src, dst);
3373         } else {
3374           DCHECK(source->IsSimd128Register());
3375           __ Swap(src.Q(), dst.Q());
3376         }
3377       }
3378       return;
3379     case MoveType::kRegisterToStack: {
3380       UseScratchRegisterScope scope(tasm());
3381       MemOperand dst = g.ToMemOperand(destination, tasm());
3382       if (source->IsRegister()) {
3383         Register temp = scope.AcquireX();
3384         Register src = g.ToRegister(source);
3385         __ Mov(temp, src);
3386         __ Ldr(src, dst);
3387         __ Str(temp, dst);
3388       } else {
3389         UseScratchRegisterScope scope(tasm());
3390         VRegister src = g.ToDoubleRegister(source);
3391         if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3392           VRegister temp = scope.AcquireD();
3393           __ Mov(temp, src);
3394           __ Ldr(src, dst);
3395           __ Str(temp, dst);
3396         } else {
3397           DCHECK(source->IsSimd128Register());
3398           VRegister temp = scope.AcquireQ();
3399           __ Mov(temp, src.Q());
3400           __ Ldr(src.Q(), dst);
3401           __ Str(temp, dst);
3402         }
3403       }
3404       return;
3405     }
3406     case MoveType::kStackToStack: {
3407       UseScratchRegisterScope scope(tasm());
3408       MemOperand src = g.ToMemOperand(source, tasm());
3409       MemOperand dst = g.ToMemOperand(destination, tasm());
3410       VRegister temp_0 = scope.AcquireD();
3411       VRegister temp_1 = scope.AcquireD();
3412       if (source->IsSimd128StackSlot()) {
3413         __ Ldr(temp_0.Q(), src);
3414         __ Ldr(temp_1.Q(), dst);
3415         __ Str(temp_0.Q(), dst);
3416         __ Str(temp_1.Q(), src);
3417       } else {
3418         __ Ldr(temp_0, src);
3419         __ Ldr(temp_1, dst);
3420         __ Str(temp_0, dst);
3421         __ Str(temp_1, src);
3422       }
3423       return;
3424     }
3425     default:
3426       UNREACHABLE();
3427   }
3428 }
3429 
AssembleJumpTable(Label ** targets,size_t target_count)3430 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3431   // On 64-bit ARM we emit the jump tables inline.
3432   UNREACHABLE();
3433 }
3434 
3435 #undef __
3436 
3437 }  // namespace compiler
3438 }  // namespace internal
3439 }  // namespace v8
3440