1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/codegen/arm64/assembler-arm64-inl.h"
6 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
7 #include "src/codegen/optimized-compilation-info.h"
8 #include "src/compiler/backend/code-generator-impl.h"
9 #include "src/compiler/backend/code-generator.h"
10 #include "src/compiler/backend/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/execution/frame-constants.h"
14 #include "src/heap/memory-chunk.h"
15 #include "src/wasm/wasm-code-manager.h"
16 #include "src/wasm/wasm-objects.h"
17
18 namespace v8 {
19 namespace internal {
20 namespace compiler {
21
22 #define __ tasm()->
23
24 // Adds Arm64-specific methods to convert InstructionOperands.
25 class Arm64OperandConverter final : public InstructionOperandConverter {
26 public:
Arm64OperandConverter(CodeGenerator * gen,Instruction * instr)27 Arm64OperandConverter(CodeGenerator* gen, Instruction* instr)
28 : InstructionOperandConverter(gen, instr) {}
29
InputFloat32Register(size_t index)30 DoubleRegister InputFloat32Register(size_t index) {
31 return InputDoubleRegister(index).S();
32 }
33
InputFloat64Register(size_t index)34 DoubleRegister InputFloat64Register(size_t index) {
35 return InputDoubleRegister(index);
36 }
37
InputSimd128Register(size_t index)38 DoubleRegister InputSimd128Register(size_t index) {
39 return InputDoubleRegister(index).Q();
40 }
41
InputFloat32OrZeroRegister(size_t index)42 CPURegister InputFloat32OrZeroRegister(size_t index) {
43 if (instr_->InputAt(index)->IsImmediate()) {
44 DCHECK_EQ(0, bit_cast<int32_t>(InputFloat32(index)));
45 return wzr;
46 }
47 DCHECK(instr_->InputAt(index)->IsFPRegister());
48 return InputDoubleRegister(index).S();
49 }
50
InputFloat64OrZeroRegister(size_t index)51 CPURegister InputFloat64OrZeroRegister(size_t index) {
52 if (instr_->InputAt(index)->IsImmediate()) {
53 DCHECK_EQ(0, bit_cast<int64_t>(InputDouble(index)));
54 return xzr;
55 }
56 DCHECK(instr_->InputAt(index)->IsDoubleRegister());
57 return InputDoubleRegister(index);
58 }
59
OutputCount()60 size_t OutputCount() { return instr_->OutputCount(); }
61
OutputFloat32Register()62 DoubleRegister OutputFloat32Register() { return OutputDoubleRegister().S(); }
63
OutputFloat64Register()64 DoubleRegister OutputFloat64Register() { return OutputDoubleRegister(); }
65
OutputSimd128Register()66 DoubleRegister OutputSimd128Register() { return OutputDoubleRegister().Q(); }
67
InputRegister32(size_t index)68 Register InputRegister32(size_t index) {
69 return ToRegister(instr_->InputAt(index)).W();
70 }
71
InputOrZeroRegister32(size_t index)72 Register InputOrZeroRegister32(size_t index) {
73 DCHECK(instr_->InputAt(index)->IsRegister() ||
74 (instr_->InputAt(index)->IsImmediate() && (InputInt32(index) == 0)));
75 if (instr_->InputAt(index)->IsImmediate()) {
76 return wzr;
77 }
78 return InputRegister32(index);
79 }
80
InputRegister64(size_t index)81 Register InputRegister64(size_t index) { return InputRegister(index); }
82
InputOrZeroRegister64(size_t index)83 Register InputOrZeroRegister64(size_t index) {
84 DCHECK(instr_->InputAt(index)->IsRegister() ||
85 (instr_->InputAt(index)->IsImmediate() && (InputInt64(index) == 0)));
86 if (instr_->InputAt(index)->IsImmediate()) {
87 return xzr;
88 }
89 return InputRegister64(index);
90 }
91
InputOperand(size_t index)92 Operand InputOperand(size_t index) {
93 return ToOperand(instr_->InputAt(index));
94 }
95
InputOperand64(size_t index)96 Operand InputOperand64(size_t index) { return InputOperand(index); }
97
InputOperand32(size_t index)98 Operand InputOperand32(size_t index) {
99 return ToOperand32(instr_->InputAt(index));
100 }
101
OutputRegister64()102 Register OutputRegister64() { return OutputRegister(); }
103
OutputRegister32()104 Register OutputRegister32() { return ToRegister(instr_->Output()).W(); }
105
TempRegister32(size_t index)106 Register TempRegister32(size_t index) {
107 return ToRegister(instr_->TempAt(index)).W();
108 }
109
InputOperand2_32(size_t index)110 Operand InputOperand2_32(size_t index) {
111 switch (AddressingModeField::decode(instr_->opcode())) {
112 case kMode_None:
113 return InputOperand32(index);
114 case kMode_Operand2_R_LSL_I:
115 return Operand(InputRegister32(index), LSL, InputInt5(index + 1));
116 case kMode_Operand2_R_LSR_I:
117 return Operand(InputRegister32(index), LSR, InputInt5(index + 1));
118 case kMode_Operand2_R_ASR_I:
119 return Operand(InputRegister32(index), ASR, InputInt5(index + 1));
120 case kMode_Operand2_R_ROR_I:
121 return Operand(InputRegister32(index), ROR, InputInt5(index + 1));
122 case kMode_Operand2_R_UXTB:
123 return Operand(InputRegister32(index), UXTB);
124 case kMode_Operand2_R_UXTH:
125 return Operand(InputRegister32(index), UXTH);
126 case kMode_Operand2_R_SXTB:
127 return Operand(InputRegister32(index), SXTB);
128 case kMode_Operand2_R_SXTH:
129 return Operand(InputRegister32(index), SXTH);
130 case kMode_Operand2_R_SXTW:
131 return Operand(InputRegister32(index), SXTW);
132 case kMode_MRI:
133 case kMode_MRR:
134 case kMode_Root:
135 break;
136 }
137 UNREACHABLE();
138 }
139
InputOperand2_64(size_t index)140 Operand InputOperand2_64(size_t index) {
141 switch (AddressingModeField::decode(instr_->opcode())) {
142 case kMode_None:
143 return InputOperand64(index);
144 case kMode_Operand2_R_LSL_I:
145 return Operand(InputRegister64(index), LSL, InputInt6(index + 1));
146 case kMode_Operand2_R_LSR_I:
147 return Operand(InputRegister64(index), LSR, InputInt6(index + 1));
148 case kMode_Operand2_R_ASR_I:
149 return Operand(InputRegister64(index), ASR, InputInt6(index + 1));
150 case kMode_Operand2_R_ROR_I:
151 return Operand(InputRegister64(index), ROR, InputInt6(index + 1));
152 case kMode_Operand2_R_UXTB:
153 return Operand(InputRegister64(index), UXTB);
154 case kMode_Operand2_R_UXTH:
155 return Operand(InputRegister64(index), UXTH);
156 case kMode_Operand2_R_SXTB:
157 return Operand(InputRegister64(index), SXTB);
158 case kMode_Operand2_R_SXTH:
159 return Operand(InputRegister64(index), SXTH);
160 case kMode_Operand2_R_SXTW:
161 return Operand(InputRegister64(index), SXTW);
162 case kMode_MRI:
163 case kMode_MRR:
164 case kMode_Root:
165 break;
166 }
167 UNREACHABLE();
168 }
169
MemoryOperand(size_t index=0)170 MemOperand MemoryOperand(size_t index = 0) {
171 switch (AddressingModeField::decode(instr_->opcode())) {
172 case kMode_None:
173 case kMode_Operand2_R_LSR_I:
174 case kMode_Operand2_R_ASR_I:
175 case kMode_Operand2_R_ROR_I:
176 case kMode_Operand2_R_UXTB:
177 case kMode_Operand2_R_UXTH:
178 case kMode_Operand2_R_SXTB:
179 case kMode_Operand2_R_SXTH:
180 case kMode_Operand2_R_SXTW:
181 break;
182 case kMode_Root:
183 return MemOperand(kRootRegister, InputInt64(index));
184 case kMode_Operand2_R_LSL_I:
185 return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
186 LSL, InputInt32(index + 2));
187 case kMode_MRI:
188 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
189 case kMode_MRR:
190 return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
191 }
192 UNREACHABLE();
193 }
194
ToOperand(InstructionOperand * op)195 Operand ToOperand(InstructionOperand* op) {
196 if (op->IsRegister()) {
197 return Operand(ToRegister(op));
198 }
199 return ToImmediate(op);
200 }
201
ToOperand32(InstructionOperand * op)202 Operand ToOperand32(InstructionOperand* op) {
203 if (op->IsRegister()) {
204 return Operand(ToRegister(op).W());
205 }
206 return ToImmediate(op);
207 }
208
ToImmediate(InstructionOperand * operand)209 Operand ToImmediate(InstructionOperand* operand) {
210 Constant constant = ToConstant(operand);
211 switch (constant.type()) {
212 case Constant::kInt32:
213 return Operand(constant.ToInt32());
214 case Constant::kInt64:
215 if (RelocInfo::IsWasmReference(constant.rmode())) {
216 return Operand(constant.ToInt64(), constant.rmode());
217 } else {
218 return Operand(constant.ToInt64());
219 }
220 case Constant::kFloat32:
221 return Operand(Operand::EmbeddedNumber(constant.ToFloat32()));
222 case Constant::kFloat64:
223 return Operand(Operand::EmbeddedNumber(constant.ToFloat64().value()));
224 case Constant::kExternalReference:
225 return Operand(constant.ToExternalReference());
226 case Constant::kCompressedHeapObject: // Fall through.
227 case Constant::kHeapObject:
228 return Operand(constant.ToHeapObject());
229 case Constant::kDelayedStringConstant:
230 return Operand::EmbeddedStringConstant(
231 constant.ToDelayedStringConstant());
232 case Constant::kRpoNumber:
233 UNREACHABLE(); // TODO(dcarney): RPO immediates on arm64.
234 break;
235 }
236 UNREACHABLE();
237 }
238
ToMemOperand(InstructionOperand * op,TurboAssembler * tasm) const239 MemOperand ToMemOperand(InstructionOperand* op, TurboAssembler* tasm) const {
240 DCHECK_NOT_NULL(op);
241 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
242 return SlotToMemOperand(AllocatedOperand::cast(op)->index(), tasm);
243 }
244
SlotToMemOperand(int slot,TurboAssembler * tasm) const245 MemOperand SlotToMemOperand(int slot, TurboAssembler* tasm) const {
246 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
247 if (offset.from_frame_pointer()) {
248 int from_sp = offset.offset() + frame_access_state()->GetSPToFPOffset();
249 // Convert FP-offsets to SP-offsets if it results in better code.
250 if (Assembler::IsImmLSUnscaled(from_sp) ||
251 Assembler::IsImmLSScaled(from_sp, 3)) {
252 offset = FrameOffset::FromStackPointer(from_sp);
253 }
254 }
255 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
256 }
257 };
258
259 namespace {
260
261 class OutOfLineRecordWrite final : public OutOfLineCode {
262 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand offset,Register value,RecordWriteMode mode,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)263 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
264 Register value, RecordWriteMode mode,
265 StubCallMode stub_mode,
266 UnwindingInfoWriter* unwinding_info_writer)
267 : OutOfLineCode(gen),
268 object_(object),
269 offset_(offset),
270 value_(value),
271 mode_(mode),
272 stub_mode_(stub_mode),
273 must_save_lr_(!gen->frame_access_state()->has_frame()),
274 unwinding_info_writer_(unwinding_info_writer),
275 zone_(gen->zone()) {}
276
Generate()277 void Generate() final {
278 if (mode_ > RecordWriteMode::kValueIsPointer) {
279 __ JumpIfSmi(value_, exit());
280 }
281 if (COMPRESS_POINTERS_BOOL) {
282 __ DecompressTaggedPointer(value_, value_);
283 }
284 __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, ne,
285 exit());
286 RememberedSetAction const remembered_set_action =
287 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
288 : OMIT_REMEMBERED_SET;
289 SaveFPRegsMode const save_fp_mode =
290 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
291 if (must_save_lr_) {
292 // We need to save and restore lr if the frame was elided.
293 __ Push<TurboAssembler::kSignLR>(lr, padreg);
294 unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset(), sp);
295 }
296 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
297 __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
298 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
299 // A direct call to a wasm runtime stub defined in this module.
300 // Just encode the stub index. This will be patched when the code
301 // is added to the native module and copied into wasm code space.
302 __ CallRecordWriteStub(object_, offset_, remembered_set_action,
303 save_fp_mode, wasm::WasmCode::kRecordWrite);
304 } else {
305 __ CallRecordWriteStub(object_, offset_, remembered_set_action,
306 save_fp_mode);
307 }
308 if (must_save_lr_) {
309 __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
310 unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
311 }
312 }
313
314 private:
315 Register const object_;
316 Operand const offset_;
317 Register const value_;
318 RecordWriteMode const mode_;
319 StubCallMode const stub_mode_;
320 bool must_save_lr_;
321 UnwindingInfoWriter* const unwinding_info_writer_;
322 Zone* zone_;
323 };
324
FlagsConditionToCondition(FlagsCondition condition)325 Condition FlagsConditionToCondition(FlagsCondition condition) {
326 switch (condition) {
327 case kEqual:
328 return eq;
329 case kNotEqual:
330 return ne;
331 case kSignedLessThan:
332 return lt;
333 case kSignedGreaterThanOrEqual:
334 return ge;
335 case kSignedLessThanOrEqual:
336 return le;
337 case kSignedGreaterThan:
338 return gt;
339 case kUnsignedLessThan:
340 return lo;
341 case kUnsignedGreaterThanOrEqual:
342 return hs;
343 case kUnsignedLessThanOrEqual:
344 return ls;
345 case kUnsignedGreaterThan:
346 return hi;
347 case kFloatLessThanOrUnordered:
348 return lt;
349 case kFloatGreaterThanOrEqual:
350 return ge;
351 case kFloatLessThanOrEqual:
352 return ls;
353 case kFloatGreaterThanOrUnordered:
354 return hi;
355 case kFloatLessThan:
356 return lo;
357 case kFloatGreaterThanOrEqualOrUnordered:
358 return hs;
359 case kFloatLessThanOrEqualOrUnordered:
360 return le;
361 case kFloatGreaterThan:
362 return gt;
363 case kOverflow:
364 return vs;
365 case kNotOverflow:
366 return vc;
367 case kUnorderedEqual:
368 case kUnorderedNotEqual:
369 break;
370 case kPositiveOrZero:
371 return pl;
372 case kNegative:
373 return mi;
374 }
375 UNREACHABLE();
376 }
377
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,Arm64OperandConverter const & i)378 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
379 InstructionCode opcode, Instruction* instr,
380 Arm64OperandConverter const& i) {
381 const MemoryAccessMode access_mode =
382 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
383 if (access_mode == kMemoryAccessPoisoned) {
384 Register value = i.OutputRegister();
385 Register poison = value.Is64Bits() ? kSpeculationPoisonRegister
386 : kSpeculationPoisonRegister.W();
387 codegen->tasm()->And(value, value, Operand(poison));
388 }
389 }
390
EmitMaybePoisonedFPLoad(CodeGenerator * codegen,InstructionCode opcode,Arm64OperandConverter * i,VRegister output_reg)391 void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
392 Arm64OperandConverter* i, VRegister output_reg) {
393 const MemoryAccessMode access_mode =
394 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
395 AddressingMode address_mode = AddressingModeField::decode(opcode);
396 if (access_mode == kMemoryAccessPoisoned && address_mode != kMode_Root) {
397 UseScratchRegisterScope temps(codegen->tasm());
398 Register address = temps.AcquireX();
399 switch (address_mode) {
400 case kMode_MRI: // Fall through.
401 case kMode_MRR:
402 codegen->tasm()->Add(address, i->InputRegister(0), i->InputOperand(1));
403 break;
404 case kMode_Operand2_R_LSL_I:
405 codegen->tasm()->Add(address, i->InputRegister(0),
406 i->InputOperand2_64(1));
407 break;
408 default:
409 // Note: we don't need poisoning for kMode_Root loads as those loads
410 // target a fixed offset from root register which is set once when
411 // initializing the vm.
412 UNREACHABLE();
413 }
414 codegen->tasm()->And(address, address, Operand(kSpeculationPoisonRegister));
415 codegen->tasm()->Ldr(output_reg, MemOperand(address));
416 } else {
417 codegen->tasm()->Ldr(output_reg, i->MemoryOperand());
418 }
419 }
420
421 // Handles unary ops that work for float (scalar), double (scalar), or NEON.
422 template <typename Fn>
EmitFpOrNeonUnop(TurboAssembler * tasm,Fn fn,Instruction * instr,Arm64OperandConverter i,VectorFormat scalar,VectorFormat vector)423 void EmitFpOrNeonUnop(TurboAssembler* tasm, Fn fn, Instruction* instr,
424 Arm64OperandConverter i, VectorFormat scalar,
425 VectorFormat vector) {
426 VectorFormat f = instr->InputAt(0)->IsSimd128Register() ? vector : scalar;
427
428 VRegister output = VRegister::Create(i.OutputDoubleRegister().code(), f);
429 VRegister input = VRegister::Create(i.InputDoubleRegister(0).code(), f);
430 (tasm->*fn)(output, input);
431 }
432
433 } // namespace
434
435 #define ASSEMBLE_SHIFT(asm_instr, width) \
436 do { \
437 if (instr->InputAt(1)->IsRegister()) { \
438 __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0), \
439 i.InputRegister##width(1)); \
440 } else { \
441 uint32_t imm = \
442 static_cast<uint32_t>(i.InputOperand##width(1).ImmediateValue()); \
443 __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0), \
444 imm % (width)); \
445 } \
446 } while (0)
447
448 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr, reg) \
449 do { \
450 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
451 __ asm_instr(i.Output##reg(), i.TempRegister(0)); \
452 } while (0)
453
454 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, reg) \
455 do { \
456 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
457 __ asm_instr(i.Input##reg(2), i.TempRegister(0)); \
458 } while (0)
459
460 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr, reg) \
461 do { \
462 Label exchange; \
463 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
464 __ Bind(&exchange); \
465 __ load_instr(i.Output##reg(), i.TempRegister(0)); \
466 __ store_instr(i.TempRegister32(1), i.Input##reg(2), i.TempRegister(0)); \
467 __ Cbnz(i.TempRegister32(1), &exchange); \
468 } while (0)
469
470 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, ext, \
471 reg) \
472 do { \
473 Label compareExchange; \
474 Label exit; \
475 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
476 __ Bind(&compareExchange); \
477 __ load_instr(i.Output##reg(), i.TempRegister(0)); \
478 __ Cmp(i.Output##reg(), Operand(i.Input##reg(2), ext)); \
479 __ B(ne, &exit); \
480 __ store_instr(i.TempRegister32(1), i.Input##reg(3), i.TempRegister(0)); \
481 __ Cbnz(i.TempRegister32(1), &compareExchange); \
482 __ Bind(&exit); \
483 } while (0)
484
485 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr, reg) \
486 do { \
487 Label binop; \
488 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
489 __ Bind(&binop); \
490 __ load_instr(i.Output##reg(), i.TempRegister(0)); \
491 __ bin_instr(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
492 __ store_instr(i.TempRegister32(2), i.Temp##reg(1), i.TempRegister(0)); \
493 __ Cbnz(i.TempRegister32(2), &binop); \
494 } while (0)
495
496 #define ASSEMBLE_IEEE754_BINOP(name) \
497 do { \
498 FrameScope scope(tasm(), StackFrame::MANUAL); \
499 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
500 } while (0)
501
502 #define ASSEMBLE_IEEE754_UNOP(name) \
503 do { \
504 FrameScope scope(tasm(), StackFrame::MANUAL); \
505 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
506 } while (0)
507
508 // If shift value is an immediate, we can call asm_imm, taking the shift value
509 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
510 // call asm_shl.
511 #define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, format, asm_shl, gp) \
512 do { \
513 if (instr->InputAt(1)->IsImmediate()) { \
514 __ asm_imm(i.OutputSimd128Register().format(), \
515 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
516 } else { \
517 UseScratchRegisterScope temps(tasm()); \
518 VRegister tmp = temps.AcquireQ(); \
519 Register shift = temps.Acquire##gp(); \
520 constexpr int mask = (1 << width) - 1; \
521 __ And(shift, i.InputRegister32(1), mask); \
522 __ Dup(tmp.format(), shift); \
523 __ asm_shl(i.OutputSimd128Register().format(), \
524 i.InputSimd128Register(0).format(), tmp.format()); \
525 } \
526 } while (0)
527
528 // If shift value is an immediate, we can call asm_imm, taking the shift value
529 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
530 // call asm_shl, passing in the negative shift value (treated as right shift).
531 #define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, format, asm_shl, gp) \
532 do { \
533 if (instr->InputAt(1)->IsImmediate()) { \
534 __ asm_imm(i.OutputSimd128Register().format(), \
535 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
536 } else { \
537 UseScratchRegisterScope temps(tasm()); \
538 VRegister tmp = temps.AcquireQ(); \
539 Register shift = temps.Acquire##gp(); \
540 constexpr int mask = (1 << width) - 1; \
541 __ And(shift, i.InputRegister32(1), mask); \
542 __ Dup(tmp.format(), shift); \
543 __ Neg(tmp.format(), tmp.format()); \
544 __ asm_shl(i.OutputSimd128Register().format(), \
545 i.InputSimd128Register(0).format(), tmp.format()); \
546 } \
547 } while (0)
548
AssembleDeconstructFrame()549 void CodeGenerator::AssembleDeconstructFrame() {
550 __ Mov(sp, fp);
551 __ Pop<TurboAssembler::kAuthLR>(fp, lr);
552
553 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
554 }
555
AssemblePrepareTailCall()556 void CodeGenerator::AssemblePrepareTailCall() {
557 if (frame_access_state()->has_frame()) {
558 __ RestoreFPAndLR();
559 }
560 frame_access_state()->SetFrameAccessToSP();
561 }
562
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)563 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
564 Register scratch1,
565 Register scratch2,
566 Register scratch3) {
567 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
568 Label done;
569
570 // Check if current frame is an arguments adaptor frame.
571 __ Ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
572 __ Cmp(scratch1,
573 Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
574 __ B(ne, &done);
575
576 // Load arguments count from current arguments adaptor frame (note, it
577 // does not include receiver).
578 Register caller_args_count_reg = scratch1;
579 __ Ldr(caller_args_count_reg,
580 MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
581 __ SmiUntag(caller_args_count_reg);
582
583 __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
584 __ bind(&done);
585 }
586
587 namespace {
588
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)589 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
590 FrameAccessState* state,
591 int new_slot_above_sp,
592 bool allow_shrinkage = true) {
593 int current_sp_offset = state->GetSPToFPSlotCount() +
594 StandardFrameConstants::kFixedSlotCountAboveFp;
595 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
596 DCHECK_EQ(stack_slot_delta % 2, 0);
597 if (stack_slot_delta > 0) {
598 tasm->Claim(stack_slot_delta);
599 state->IncreaseSPDelta(stack_slot_delta);
600 } else if (allow_shrinkage && stack_slot_delta < 0) {
601 tasm->Drop(-stack_slot_delta);
602 state->IncreaseSPDelta(stack_slot_delta);
603 }
604 }
605
606 } // namespace
607
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)608 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
609 int first_unused_stack_slot) {
610 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
611 first_unused_stack_slot, false);
612 }
613
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)614 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
615 int first_unused_stack_slot) {
616 DCHECK_EQ(first_unused_stack_slot % 2, 0);
617 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
618 first_unused_stack_slot);
619 DCHECK(instr->IsTailCall());
620 InstructionOperandConverter g(this, instr);
621 int optional_padding_slot = g.InputInt32(instr->InputCount() - 2);
622 if (optional_padding_slot % 2) {
623 __ Poke(padreg, optional_padding_slot * kSystemPointerSize);
624 }
625 }
626
627 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()628 void CodeGenerator::AssembleCodeStartRegisterCheck() {
629 UseScratchRegisterScope temps(tasm());
630 Register scratch = temps.AcquireX();
631 __ ComputeCodeStartAddress(scratch);
632 __ cmp(scratch, kJavaScriptCallCodeStartRegister);
633 __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
634 }
635
636 // Check if the code object is marked for deoptimization. If it is, then it
637 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
638 // to:
639 // 1. read from memory the word that contains that bit, which can be found in
640 // the flags in the referenced {CodeDataContainer} object;
641 // 2. test kMarkedForDeoptimizationBit in those flags; and
642 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()643 void CodeGenerator::BailoutIfDeoptimized() {
644 UseScratchRegisterScope temps(tasm());
645 Register scratch = temps.AcquireX();
646 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
647 __ LoadTaggedPointerField(
648 scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
649 __ Ldr(scratch.W(),
650 FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
651 Label not_deoptimized;
652 __ Tbz(scratch.W(), Code::kMarkedForDeoptimizationBit, ¬_deoptimized);
653 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
654 RelocInfo::CODE_TARGET);
655 __ Bind(¬_deoptimized);
656 }
657
GenerateSpeculationPoisonFromCodeStartRegister()658 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
659 UseScratchRegisterScope temps(tasm());
660 Register scratch = temps.AcquireX();
661
662 // Set a mask which has all bits set in the normal case, but has all
663 // bits cleared if we are speculatively executing the wrong PC.
664 __ ComputeCodeStartAddress(scratch);
665 __ Cmp(kJavaScriptCallCodeStartRegister, scratch);
666 __ Csetm(kSpeculationPoisonRegister, eq);
667 __ Csdb();
668 }
669
AssembleRegisterArgumentPoisoning()670 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
671 UseScratchRegisterScope temps(tasm());
672 Register scratch = temps.AcquireX();
673
674 __ Mov(scratch, sp);
675 __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
676 __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
677 __ And(scratch, scratch, kSpeculationPoisonRegister);
678 __ Mov(sp, scratch);
679 }
680
681 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)682 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
683 Instruction* instr) {
684 Arm64OperandConverter i(this, instr);
685 InstructionCode opcode = instr->opcode();
686 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
687 switch (arch_opcode) {
688 case kArchCallCodeObject: {
689 if (instr->InputAt(0)->IsImmediate()) {
690 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
691 } else {
692 Register reg = i.InputRegister(0);
693 DCHECK_IMPLIES(
694 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
695 reg == kJavaScriptCallCodeStartRegister);
696 __ CallCodeObject(reg);
697 }
698 RecordCallPosition(instr);
699 frame_access_state()->ClearSPDelta();
700 break;
701 }
702 case kArchCallBuiltinPointer: {
703 DCHECK(!instr->InputAt(0)->IsImmediate());
704 Register builtin_index = i.InputRegister(0);
705 __ CallBuiltinByIndex(builtin_index);
706 RecordCallPosition(instr);
707 frame_access_state()->ClearSPDelta();
708 break;
709 }
710 case kArchCallWasmFunction: {
711 if (instr->InputAt(0)->IsImmediate()) {
712 Constant constant = i.ToConstant(instr->InputAt(0));
713 Address wasm_code = static_cast<Address>(constant.ToInt64());
714 __ Call(wasm_code, constant.rmode());
715 } else {
716 Register target = i.InputRegister(0);
717 __ Call(target);
718 }
719 RecordCallPosition(instr);
720 frame_access_state()->ClearSPDelta();
721 break;
722 }
723 case kArchTailCallCodeObjectFromJSFunction:
724 case kArchTailCallCodeObject: {
725 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
726 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
727 i.TempRegister(0), i.TempRegister(1),
728 i.TempRegister(2));
729 }
730 if (instr->InputAt(0)->IsImmediate()) {
731 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
732 } else {
733 Register reg = i.InputRegister(0);
734 DCHECK_IMPLIES(
735 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
736 reg == kJavaScriptCallCodeStartRegister);
737 __ JumpCodeObject(reg);
738 }
739 unwinding_info_writer_.MarkBlockWillExit();
740 frame_access_state()->ClearSPDelta();
741 frame_access_state()->SetFrameAccessToDefault();
742 break;
743 }
744 case kArchTailCallWasm: {
745 if (instr->InputAt(0)->IsImmediate()) {
746 Constant constant = i.ToConstant(instr->InputAt(0));
747 Address wasm_code = static_cast<Address>(constant.ToInt64());
748 __ Jump(wasm_code, constant.rmode());
749 } else {
750 Register target = i.InputRegister(0);
751 UseScratchRegisterScope temps(tasm());
752 temps.Exclude(x17);
753 __ Mov(x17, target);
754 __ Jump(x17);
755 }
756 unwinding_info_writer_.MarkBlockWillExit();
757 frame_access_state()->ClearSPDelta();
758 frame_access_state()->SetFrameAccessToDefault();
759 break;
760 }
761 case kArchTailCallAddress: {
762 CHECK(!instr->InputAt(0)->IsImmediate());
763 Register reg = i.InputRegister(0);
764 DCHECK_IMPLIES(
765 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
766 reg == kJavaScriptCallCodeStartRegister);
767 UseScratchRegisterScope temps(tasm());
768 temps.Exclude(x17);
769 __ Mov(x17, reg);
770 __ Jump(x17);
771 unwinding_info_writer_.MarkBlockWillExit();
772 frame_access_state()->ClearSPDelta();
773 frame_access_state()->SetFrameAccessToDefault();
774 break;
775 }
776 case kArchCallJSFunction: {
777 Register func = i.InputRegister(0);
778 if (FLAG_debug_code) {
779 // Check the function's context matches the context argument.
780 UseScratchRegisterScope scope(tasm());
781 Register temp = scope.AcquireX();
782 __ LoadTaggedPointerField(
783 temp, FieldMemOperand(func, JSFunction::kContextOffset));
784 __ cmp(cp, temp);
785 __ Assert(eq, AbortReason::kWrongFunctionContext);
786 }
787 static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
788 __ LoadTaggedPointerField(x2,
789 FieldMemOperand(func, JSFunction::kCodeOffset));
790 __ CallCodeObject(x2);
791 RecordCallPosition(instr);
792 frame_access_state()->ClearSPDelta();
793 break;
794 }
795 case kArchPrepareCallCFunction:
796 // We don't need kArchPrepareCallCFunction on arm64 as the instruction
797 // selector has already performed a Claim to reserve space on the stack.
798 // Frame alignment is always 16 bytes, and the stack pointer is already
799 // 16-byte aligned, therefore we do not need to align the stack pointer
800 // by an unknown value, and it is safe to continue accessing the frame
801 // via the stack pointer.
802 UNREACHABLE();
803 case kArchSaveCallerRegisters: {
804 fp_mode_ =
805 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
806 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
807 // kReturnRegister0 should have been saved before entering the stub.
808 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
809 DCHECK(IsAligned(bytes, kSystemPointerSize));
810 DCHECK_EQ(0, frame_access_state()->sp_delta());
811 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
812 DCHECK(!caller_registers_saved_);
813 caller_registers_saved_ = true;
814 break;
815 }
816 case kArchRestoreCallerRegisters: {
817 DCHECK(fp_mode_ ==
818 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
819 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
820 // Don't overwrite the returned value.
821 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
822 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
823 DCHECK_EQ(0, frame_access_state()->sp_delta());
824 DCHECK(caller_registers_saved_);
825 caller_registers_saved_ = false;
826 break;
827 }
828 case kArchPrepareTailCall:
829 AssemblePrepareTailCall();
830 break;
831 case kArchCallCFunction: {
832 int const num_parameters = MiscField::decode(instr->opcode());
833 Label return_location;
834 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
835 // Put the return address in a stack slot.
836 __ StoreReturnAddressInWasmExitFrame(&return_location);
837 }
838
839 if (instr->InputAt(0)->IsImmediate()) {
840 ExternalReference ref = i.InputExternalReference(0);
841 __ CallCFunction(ref, num_parameters, 0);
842 } else {
843 Register func = i.InputRegister(0);
844 __ CallCFunction(func, num_parameters, 0);
845 }
846 __ Bind(&return_location);
847 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
848 RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
849 }
850 frame_access_state()->SetFrameAccessToDefault();
851 // Ideally, we should decrement SP delta to match the change of stack
852 // pointer in CallCFunction. However, for certain architectures (e.g.
853 // ARM), there may be more strict alignment requirement, causing old SP
854 // to be saved on the stack. In those cases, we can not calculate the SP
855 // delta statically.
856 frame_access_state()->ClearSPDelta();
857 if (caller_registers_saved_) {
858 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
859 // Here, we assume the sequence to be:
860 // kArchSaveCallerRegisters;
861 // kArchCallCFunction;
862 // kArchRestoreCallerRegisters;
863 int bytes =
864 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
865 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
866 }
867 break;
868 }
869 case kArchJmp:
870 AssembleArchJump(i.InputRpo(0));
871 break;
872 case kArchTableSwitch:
873 AssembleArchTableSwitch(instr);
874 break;
875 case kArchBinarySearchSwitch:
876 AssembleArchBinarySearchSwitch(instr);
877 break;
878 case kArchAbortCSAAssert:
879 DCHECK_EQ(i.InputRegister(0), x1);
880 {
881 // We don't actually want to generate a pile of code for this, so just
882 // claim there is a stack frame, without generating one.
883 FrameScope scope(tasm(), StackFrame::NONE);
884 __ Call(
885 isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
886 RelocInfo::CODE_TARGET);
887 }
888 __ Debug("kArchAbortCSAAssert", 0, BREAK);
889 unwinding_info_writer_.MarkBlockWillExit();
890 break;
891 case kArchDebugBreak:
892 __ DebugBreak();
893 break;
894 case kArchComment:
895 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
896 break;
897 case kArchThrowTerminator:
898 unwinding_info_writer_.MarkBlockWillExit();
899 break;
900 case kArchNop:
901 // don't emit code for nops.
902 break;
903 case kArchDeoptimize: {
904 DeoptimizationExit* exit =
905 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
906 __ B(exit->label());
907 break;
908 }
909 case kArchRet:
910 AssembleReturn(instr->InputAt(0));
911 break;
912 case kArchFramePointer:
913 __ mov(i.OutputRegister(), fp);
914 break;
915 case kArchParentFramePointer:
916 if (frame_access_state()->has_frame()) {
917 __ ldr(i.OutputRegister(), MemOperand(fp, 0));
918 } else {
919 __ mov(i.OutputRegister(), fp);
920 }
921 break;
922 case kArchStackPointerGreaterThan: {
923 // Potentially apply an offset to the current stack pointer before the
924 // comparison to consider the size difference of an optimized frame versus
925 // the contained unoptimized frames.
926
927 Register lhs_register = sp;
928 uint32_t offset;
929
930 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
931 lhs_register = i.TempRegister(0);
932 __ Sub(lhs_register, sp, offset);
933 }
934
935 constexpr size_t kValueIndex = 0;
936 DCHECK(instr->InputAt(kValueIndex)->IsRegister());
937 __ Cmp(lhs_register, i.InputRegister(kValueIndex));
938 break;
939 }
940 case kArchStackCheckOffset:
941 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
942 break;
943 case kArchTruncateDoubleToI:
944 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
945 i.InputDoubleRegister(0), DetermineStubCallMode(),
946 frame_access_state()->has_frame()
947 ? kLRHasBeenSaved
948 : kLRHasNotBeenSaved);
949
950 break;
951 case kArchStoreWithWriteBarrier: {
952 RecordWriteMode mode =
953 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
954 AddressingMode addressing_mode =
955 AddressingModeField::decode(instr->opcode());
956 Register object = i.InputRegister(0);
957 Operand offset(0);
958 if (addressing_mode == kMode_MRI) {
959 offset = Operand(i.InputInt64(1));
960 } else {
961 DCHECK_EQ(addressing_mode, kMode_MRR);
962 offset = Operand(i.InputRegister(1));
963 }
964 Register value = i.InputRegister(2);
965 auto ool = zone()->New<OutOfLineRecordWrite>(
966 this, object, offset, value, mode, DetermineStubCallMode(),
967 &unwinding_info_writer_);
968 __ StoreTaggedField(value, MemOperand(object, offset));
969 __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
970 eq, ool->entry());
971 __ Bind(ool->exit());
972 break;
973 }
974 case kArchStackSlot: {
975 FrameOffset offset =
976 frame_access_state()->GetFrameOffset(i.InputInt32(0));
977 Register base = offset.from_stack_pointer() ? sp : fp;
978 __ Add(i.OutputRegister(0), base, Operand(offset.offset()));
979 break;
980 }
981 case kIeee754Float64Acos:
982 ASSEMBLE_IEEE754_UNOP(acos);
983 break;
984 case kIeee754Float64Acosh:
985 ASSEMBLE_IEEE754_UNOP(acosh);
986 break;
987 case kIeee754Float64Asin:
988 ASSEMBLE_IEEE754_UNOP(asin);
989 break;
990 case kIeee754Float64Asinh:
991 ASSEMBLE_IEEE754_UNOP(asinh);
992 break;
993 case kIeee754Float64Atan:
994 ASSEMBLE_IEEE754_UNOP(atan);
995 break;
996 case kIeee754Float64Atanh:
997 ASSEMBLE_IEEE754_UNOP(atanh);
998 break;
999 case kIeee754Float64Atan2:
1000 ASSEMBLE_IEEE754_BINOP(atan2);
1001 break;
1002 case kIeee754Float64Cos:
1003 ASSEMBLE_IEEE754_UNOP(cos);
1004 break;
1005 case kIeee754Float64Cosh:
1006 ASSEMBLE_IEEE754_UNOP(cosh);
1007 break;
1008 case kIeee754Float64Cbrt:
1009 ASSEMBLE_IEEE754_UNOP(cbrt);
1010 break;
1011 case kIeee754Float64Exp:
1012 ASSEMBLE_IEEE754_UNOP(exp);
1013 break;
1014 case kIeee754Float64Expm1:
1015 ASSEMBLE_IEEE754_UNOP(expm1);
1016 break;
1017 case kIeee754Float64Log:
1018 ASSEMBLE_IEEE754_UNOP(log);
1019 break;
1020 case kIeee754Float64Log1p:
1021 ASSEMBLE_IEEE754_UNOP(log1p);
1022 break;
1023 case kIeee754Float64Log2:
1024 ASSEMBLE_IEEE754_UNOP(log2);
1025 break;
1026 case kIeee754Float64Log10:
1027 ASSEMBLE_IEEE754_UNOP(log10);
1028 break;
1029 case kIeee754Float64Pow:
1030 ASSEMBLE_IEEE754_BINOP(pow);
1031 break;
1032 case kIeee754Float64Sin:
1033 ASSEMBLE_IEEE754_UNOP(sin);
1034 break;
1035 case kIeee754Float64Sinh:
1036 ASSEMBLE_IEEE754_UNOP(sinh);
1037 break;
1038 case kIeee754Float64Tan:
1039 ASSEMBLE_IEEE754_UNOP(tan);
1040 break;
1041 case kIeee754Float64Tanh:
1042 ASSEMBLE_IEEE754_UNOP(tanh);
1043 break;
1044 case kArm64Float32RoundDown:
1045 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatS,
1046 kFormat4S);
1047 break;
1048 case kArm64Float64RoundDown:
1049 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatD,
1050 kFormat2D);
1051 break;
1052 case kArm64Float32RoundUp:
1053 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatS,
1054 kFormat4S);
1055 break;
1056 case kArm64Float64RoundUp:
1057 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatD,
1058 kFormat2D);
1059 break;
1060 case kArm64Float64RoundTiesAway:
1061 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frinta, instr, i, kFormatD,
1062 kFormat2D);
1063 break;
1064 case kArm64Float32RoundTruncate:
1065 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatS,
1066 kFormat4S);
1067 break;
1068 case kArm64Float64RoundTruncate:
1069 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatD,
1070 kFormat2D);
1071 break;
1072 case kArm64Float32RoundTiesEven:
1073 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatS,
1074 kFormat4S);
1075 break;
1076 case kArm64Float64RoundTiesEven:
1077 EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatD,
1078 kFormat2D);
1079 break;
1080 case kArm64Add:
1081 if (FlagsModeField::decode(opcode) != kFlags_none) {
1082 __ Adds(i.OutputRegister(), i.InputOrZeroRegister64(0),
1083 i.InputOperand2_64(1));
1084 } else {
1085 __ Add(i.OutputRegister(), i.InputOrZeroRegister64(0),
1086 i.InputOperand2_64(1));
1087 }
1088 break;
1089 case kArm64Add32:
1090 if (FlagsModeField::decode(opcode) != kFlags_none) {
1091 __ Adds(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1092 i.InputOperand2_32(1));
1093 } else {
1094 __ Add(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1095 i.InputOperand2_32(1));
1096 }
1097 break;
1098 case kArm64And:
1099 if (FlagsModeField::decode(opcode) != kFlags_none) {
1100 // The ands instruction only sets N and Z, so only the following
1101 // conditions make sense.
1102 DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
1103 FlagsConditionField::decode(opcode) == kNotEqual ||
1104 FlagsConditionField::decode(opcode) == kPositiveOrZero ||
1105 FlagsConditionField::decode(opcode) == kNegative);
1106 __ Ands(i.OutputRegister(), i.InputOrZeroRegister64(0),
1107 i.InputOperand2_64(1));
1108 } else {
1109 __ And(i.OutputRegister(), i.InputOrZeroRegister64(0),
1110 i.InputOperand2_64(1));
1111 }
1112 break;
1113 case kArm64And32:
1114 if (FlagsModeField::decode(opcode) != kFlags_none) {
1115 // The ands instruction only sets N and Z, so only the following
1116 // conditions make sense.
1117 DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
1118 FlagsConditionField::decode(opcode) == kNotEqual ||
1119 FlagsConditionField::decode(opcode) == kPositiveOrZero ||
1120 FlagsConditionField::decode(opcode) == kNegative);
1121 __ Ands(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1122 i.InputOperand2_32(1));
1123 } else {
1124 __ And(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1125 i.InputOperand2_32(1));
1126 }
1127 break;
1128 case kArm64Bic:
1129 __ Bic(i.OutputRegister(), i.InputOrZeroRegister64(0),
1130 i.InputOperand2_64(1));
1131 break;
1132 case kArm64Bic32:
1133 __ Bic(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1134 i.InputOperand2_32(1));
1135 break;
1136 case kArm64Mul:
1137 __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1138 break;
1139 case kArm64Mul32:
1140 __ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1141 break;
1142 case kArm64Saddlp: {
1143 VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1144 VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1145 __ Saddlp(i.OutputSimd128Register().Format(dst_f),
1146 i.InputSimd128Register(0).Format(src_f));
1147 break;
1148 }
1149 case kArm64Uaddlp: {
1150 VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1151 VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1152 __ Uaddlp(i.OutputSimd128Register().Format(dst_f),
1153 i.InputSimd128Register(0).Format(src_f));
1154 break;
1155 }
1156 case kArm64Smull: {
1157 if (instr->InputAt(0)->IsRegister()) {
1158 __ Smull(i.OutputRegister(), i.InputRegister32(0),
1159 i.InputRegister32(1));
1160 } else {
1161 DCHECK(instr->InputAt(0)->IsSimd128Register());
1162 VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1163 VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1164 __ Smull(i.OutputSimd128Register().Format(dst_f),
1165 i.InputSimd128Register(0).Format(src_f),
1166 i.InputSimd128Register(1).Format(src_f));
1167 }
1168 break;
1169 }
1170 case kArm64Smull2: {
1171 VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1172 VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1173 __ Smull2(i.OutputSimd128Register().Format(dst_f),
1174 i.InputSimd128Register(0).Format(src_f),
1175 i.InputSimd128Register(1).Format(src_f));
1176 break;
1177 }
1178 case kArm64Umull: {
1179 if (instr->InputAt(0)->IsRegister()) {
1180 __ Umull(i.OutputRegister(), i.InputRegister32(0),
1181 i.InputRegister32(1));
1182 } else {
1183 DCHECK(instr->InputAt(0)->IsSimd128Register());
1184 VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1185 VectorFormat src_f = VectorFormatHalfWidth(dst_f);
1186 __ Umull(i.OutputSimd128Register().Format(dst_f),
1187 i.InputSimd128Register(0).Format(src_f),
1188 i.InputSimd128Register(1).Format(src_f));
1189 }
1190 break;
1191 }
1192 case kArm64Umull2: {
1193 VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
1194 VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
1195 __ Umull2(i.OutputSimd128Register().Format(dst_f),
1196 i.InputSimd128Register(0).Format(src_f),
1197 i.InputSimd128Register(1).Format(src_f));
1198 break;
1199 }
1200 case kArm64Madd:
1201 __ Madd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1202 i.InputRegister(2));
1203 break;
1204 case kArm64Madd32:
1205 __ Madd(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1206 i.InputRegister32(2));
1207 break;
1208 case kArm64Msub:
1209 __ Msub(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1210 i.InputRegister(2));
1211 break;
1212 case kArm64Msub32:
1213 __ Msub(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1214 i.InputRegister32(2));
1215 break;
1216 case kArm64Mneg:
1217 __ Mneg(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1218 break;
1219 case kArm64Mneg32:
1220 __ Mneg(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1221 break;
1222 case kArm64Idiv:
1223 __ Sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1224 break;
1225 case kArm64Idiv32:
1226 __ Sdiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1227 break;
1228 case kArm64Udiv:
1229 __ Udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1230 break;
1231 case kArm64Udiv32:
1232 __ Udiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1233 break;
1234 case kArm64Imod: {
1235 UseScratchRegisterScope scope(tasm());
1236 Register temp = scope.AcquireX();
1237 __ Sdiv(temp, i.InputRegister(0), i.InputRegister(1));
1238 __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1239 break;
1240 }
1241 case kArm64Imod32: {
1242 UseScratchRegisterScope scope(tasm());
1243 Register temp = scope.AcquireW();
1244 __ Sdiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1245 __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1246 i.InputRegister32(0));
1247 break;
1248 }
1249 case kArm64Umod: {
1250 UseScratchRegisterScope scope(tasm());
1251 Register temp = scope.AcquireX();
1252 __ Udiv(temp, i.InputRegister(0), i.InputRegister(1));
1253 __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1254 break;
1255 }
1256 case kArm64Umod32: {
1257 UseScratchRegisterScope scope(tasm());
1258 Register temp = scope.AcquireW();
1259 __ Udiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1260 __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1261 i.InputRegister32(0));
1262 break;
1263 }
1264 case kArm64Not:
1265 __ Mvn(i.OutputRegister(), i.InputOperand(0));
1266 break;
1267 case kArm64Not32:
1268 __ Mvn(i.OutputRegister32(), i.InputOperand32(0));
1269 break;
1270 case kArm64Or:
1271 __ Orr(i.OutputRegister(), i.InputOrZeroRegister64(0),
1272 i.InputOperand2_64(1));
1273 break;
1274 case kArm64Or32:
1275 __ Orr(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1276 i.InputOperand2_32(1));
1277 break;
1278 case kArm64Orn:
1279 __ Orn(i.OutputRegister(), i.InputOrZeroRegister64(0),
1280 i.InputOperand2_64(1));
1281 break;
1282 case kArm64Orn32:
1283 __ Orn(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1284 i.InputOperand2_32(1));
1285 break;
1286 case kArm64Eor:
1287 __ Eor(i.OutputRegister(), i.InputOrZeroRegister64(0),
1288 i.InputOperand2_64(1));
1289 break;
1290 case kArm64Eor32:
1291 __ Eor(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1292 i.InputOperand2_32(1));
1293 break;
1294 case kArm64Eon:
1295 __ Eon(i.OutputRegister(), i.InputOrZeroRegister64(0),
1296 i.InputOperand2_64(1));
1297 break;
1298 case kArm64Eon32:
1299 __ Eon(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1300 i.InputOperand2_32(1));
1301 break;
1302 case kArm64Sub:
1303 if (FlagsModeField::decode(opcode) != kFlags_none) {
1304 __ Subs(i.OutputRegister(), i.InputOrZeroRegister64(0),
1305 i.InputOperand2_64(1));
1306 } else {
1307 __ Sub(i.OutputRegister(), i.InputOrZeroRegister64(0),
1308 i.InputOperand2_64(1));
1309 }
1310 break;
1311 case kArm64Sub32:
1312 if (FlagsModeField::decode(opcode) != kFlags_none) {
1313 __ Subs(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1314 i.InputOperand2_32(1));
1315 } else {
1316 __ Sub(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1317 i.InputOperand2_32(1));
1318 }
1319 break;
1320 case kArm64Lsl:
1321 ASSEMBLE_SHIFT(Lsl, 64);
1322 break;
1323 case kArm64Lsl32:
1324 ASSEMBLE_SHIFT(Lsl, 32);
1325 break;
1326 case kArm64Lsr:
1327 ASSEMBLE_SHIFT(Lsr, 64);
1328 break;
1329 case kArm64Lsr32:
1330 ASSEMBLE_SHIFT(Lsr, 32);
1331 break;
1332 case kArm64Asr:
1333 ASSEMBLE_SHIFT(Asr, 64);
1334 break;
1335 case kArm64Asr32:
1336 ASSEMBLE_SHIFT(Asr, 32);
1337 break;
1338 case kArm64Ror:
1339 ASSEMBLE_SHIFT(Ror, 64);
1340 break;
1341 case kArm64Ror32:
1342 ASSEMBLE_SHIFT(Ror, 32);
1343 break;
1344 case kArm64Mov32:
1345 __ Mov(i.OutputRegister32(), i.InputRegister32(0));
1346 break;
1347 case kArm64Sxtb32:
1348 __ Sxtb(i.OutputRegister32(), i.InputRegister32(0));
1349 break;
1350 case kArm64Sxth32:
1351 __ Sxth(i.OutputRegister32(), i.InputRegister32(0));
1352 break;
1353 case kArm64Sxtb:
1354 __ Sxtb(i.OutputRegister(), i.InputRegister32(0));
1355 break;
1356 case kArm64Sxth:
1357 __ Sxth(i.OutputRegister(), i.InputRegister32(0));
1358 break;
1359 case kArm64Sxtw:
1360 __ Sxtw(i.OutputRegister(), i.InputRegister32(0));
1361 break;
1362 case kArm64Sbfx:
1363 __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1364 i.InputInt6(2));
1365 break;
1366 case kArm64Sbfx32:
1367 __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1368 i.InputInt5(2));
1369 break;
1370 case kArm64Ubfx:
1371 __ Ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1372 i.InputInt32(2));
1373 break;
1374 case kArm64Ubfx32:
1375 __ Ubfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1376 i.InputInt32(2));
1377 break;
1378 case kArm64Ubfiz32:
1379 __ Ubfiz(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1380 i.InputInt5(2));
1381 break;
1382 case kArm64Bfi:
1383 __ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
1384 i.InputInt6(3));
1385 break;
1386 case kArm64TestAndBranch32:
1387 case kArm64TestAndBranch:
1388 // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
1389 break;
1390 case kArm64CompareAndBranch32:
1391 case kArm64CompareAndBranch:
1392 // Pseudo instruction handled in AssembleArchBranch.
1393 break;
1394 case kArm64Claim: {
1395 int count = i.InputInt32(0);
1396 DCHECK_EQ(count % 2, 0);
1397 __ AssertSpAligned();
1398 if (count > 0) {
1399 __ Claim(count);
1400 frame_access_state()->IncreaseSPDelta(count);
1401 }
1402 break;
1403 }
1404 case kArm64Poke: {
1405 Operand operand(i.InputInt32(1) * kSystemPointerSize);
1406 if (instr->InputAt(0)->IsSimd128Register()) {
1407 __ Poke(i.InputSimd128Register(0), operand);
1408 } else if (instr->InputAt(0)->IsFPRegister()) {
1409 __ Poke(i.InputFloat64Register(0), operand);
1410 } else {
1411 __ Poke(i.InputOrZeroRegister64(0), operand);
1412 }
1413 break;
1414 }
1415 case kArm64PokePair: {
1416 int slot = i.InputInt32(2) - 1;
1417 if (instr->InputAt(0)->IsFPRegister()) {
1418 __ PokePair(i.InputFloat64Register(1), i.InputFloat64Register(0),
1419 slot * kSystemPointerSize);
1420 } else {
1421 __ PokePair(i.InputRegister(1), i.InputRegister(0),
1422 slot * kSystemPointerSize);
1423 }
1424 break;
1425 }
1426 case kArm64Peek: {
1427 int reverse_slot = i.InputInt32(0);
1428 int offset =
1429 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1430 if (instr->OutputAt(0)->IsFPRegister()) {
1431 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1432 if (op->representation() == MachineRepresentation::kFloat64) {
1433 __ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1434 } else if (op->representation() == MachineRepresentation::kFloat32) {
1435 __ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1436 } else {
1437 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1438 __ Ldr(i.OutputSimd128Register(), MemOperand(fp, offset));
1439 }
1440 } else {
1441 __ Ldr(i.OutputRegister(), MemOperand(fp, offset));
1442 }
1443 break;
1444 }
1445 case kArm64Clz:
1446 __ Clz(i.OutputRegister64(), i.InputRegister64(0));
1447 break;
1448 case kArm64Clz32:
1449 __ Clz(i.OutputRegister32(), i.InputRegister32(0));
1450 break;
1451 case kArm64Rbit:
1452 __ Rbit(i.OutputRegister64(), i.InputRegister64(0));
1453 break;
1454 case kArm64Rbit32:
1455 __ Rbit(i.OutputRegister32(), i.InputRegister32(0));
1456 break;
1457 case kArm64Rev:
1458 __ Rev(i.OutputRegister64(), i.InputRegister64(0));
1459 break;
1460 case kArm64Rev32:
1461 __ Rev(i.OutputRegister32(), i.InputRegister32(0));
1462 break;
1463 case kArm64Cmp:
1464 __ Cmp(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1465 break;
1466 case kArm64Cmp32:
1467 __ Cmp(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1468 break;
1469 case kArm64Cmn:
1470 __ Cmn(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1471 break;
1472 case kArm64Cmn32:
1473 __ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1474 break;
1475 case kArm64Cnt: {
1476 VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
1477 __ Cnt(i.OutputSimd128Register().Format(f),
1478 i.InputSimd128Register(0).Format(f));
1479 break;
1480 }
1481 case kArm64Tst:
1482 __ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1483 break;
1484 case kArm64Tst32:
1485 __ Tst(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1486 break;
1487 case kArm64Float32Cmp:
1488 if (instr->InputAt(1)->IsFPRegister()) {
1489 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32Register(1));
1490 } else {
1491 DCHECK(instr->InputAt(1)->IsImmediate());
1492 // 0.0 is the only immediate supported by fcmp instructions.
1493 DCHECK_EQ(0.0f, i.InputFloat32(1));
1494 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32(1));
1495 }
1496 break;
1497 case kArm64Float32Add:
1498 __ Fadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
1499 i.InputFloat32Register(1));
1500 break;
1501 case kArm64Float32Sub:
1502 __ Fsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
1503 i.InputFloat32Register(1));
1504 break;
1505 case kArm64Float32Mul:
1506 __ Fmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1507 i.InputFloat32Register(1));
1508 break;
1509 case kArm64Float32Div:
1510 __ Fdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
1511 i.InputFloat32Register(1));
1512 break;
1513 case kArm64Float32Abs:
1514 __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
1515 break;
1516 case kArm64Float32Neg:
1517 __ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
1518 break;
1519 case kArm64Float32Sqrt:
1520 __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
1521 break;
1522 case kArm64Float32Fnmul: {
1523 __ Fnmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1524 i.InputFloat32Register(1));
1525 break;
1526 }
1527 case kArm64Float64Cmp:
1528 if (instr->InputAt(1)->IsFPRegister()) {
1529 __ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1530 } else {
1531 DCHECK(instr->InputAt(1)->IsImmediate());
1532 // 0.0 is the only immediate supported by fcmp instructions.
1533 DCHECK_EQ(0.0, i.InputDouble(1));
1534 __ Fcmp(i.InputDoubleRegister(0), i.InputDouble(1));
1535 }
1536 break;
1537 case kArm64Float64Add:
1538 __ Fadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1539 i.InputDoubleRegister(1));
1540 break;
1541 case kArm64Float64Sub:
1542 __ Fsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1543 i.InputDoubleRegister(1));
1544 break;
1545 case kArm64Float64Mul:
1546 __ Fmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1547 i.InputDoubleRegister(1));
1548 break;
1549 case kArm64Float64Div:
1550 __ Fdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1551 i.InputDoubleRegister(1));
1552 break;
1553 case kArm64Float64Mod: {
1554 // TODO(turbofan): implement directly.
1555 FrameScope scope(tasm(), StackFrame::MANUAL);
1556 DCHECK_EQ(d0, i.InputDoubleRegister(0));
1557 DCHECK_EQ(d1, i.InputDoubleRegister(1));
1558 DCHECK_EQ(d0, i.OutputDoubleRegister());
1559 // TODO(turbofan): make sure this saves all relevant registers.
1560 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1561 break;
1562 }
1563 case kArm64Float32Max: {
1564 __ Fmax(i.OutputFloat32Register(), i.InputFloat32Register(0),
1565 i.InputFloat32Register(1));
1566 break;
1567 }
1568 case kArm64Float64Max: {
1569 __ Fmax(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1570 i.InputDoubleRegister(1));
1571 break;
1572 }
1573 case kArm64Float32Min: {
1574 __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),
1575 i.InputFloat32Register(1));
1576 break;
1577 }
1578 case kArm64Float64Min: {
1579 __ Fmin(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1580 i.InputDoubleRegister(1));
1581 break;
1582 }
1583 case kArm64Float64Abs:
1584 __ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1585 break;
1586 case kArm64Float64Neg:
1587 __ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1588 break;
1589 case kArm64Float64Sqrt:
1590 __ Fsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1591 break;
1592 case kArm64Float64Fnmul:
1593 __ Fnmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1594 i.InputDoubleRegister(1));
1595 break;
1596 case kArm64Float32ToFloat64:
1597 __ Fcvt(i.OutputDoubleRegister(), i.InputDoubleRegister(0).S());
1598 break;
1599 case kArm64Float64ToFloat32:
1600 __ Fcvt(i.OutputDoubleRegister().S(), i.InputDoubleRegister(0));
1601 break;
1602 case kArm64Float32ToInt32: {
1603 __ Fcvtzs(i.OutputRegister32(), i.InputFloat32Register(0));
1604 bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
1605 if (set_overflow_to_min_i32) {
1606 // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1607 // because INT32_MIN allows easier out-of-bounds detection.
1608 __ Cmn(i.OutputRegister32(), 1);
1609 __ Csinc(i.OutputRegister32(), i.OutputRegister32(),
1610 i.OutputRegister32(), vc);
1611 }
1612 break;
1613 }
1614 case kArm64Float64ToInt32:
1615 __ Fcvtzs(i.OutputRegister32(), i.InputDoubleRegister(0));
1616 break;
1617 case kArm64Float32ToUint32: {
1618 __ Fcvtzu(i.OutputRegister32(), i.InputFloat32Register(0));
1619 bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
1620 if (set_overflow_to_min_u32) {
1621 // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1622 // because 0 allows easier out-of-bounds detection.
1623 __ Cmn(i.OutputRegister32(), 1);
1624 __ Adc(i.OutputRegister32(), i.OutputRegister32(), Operand(0));
1625 }
1626 break;
1627 }
1628 case kArm64Float64ToUint32:
1629 __ Fcvtzu(i.OutputRegister32(), i.InputDoubleRegister(0));
1630 break;
1631 case kArm64Float32ToInt64:
1632 __ Fcvtzs(i.OutputRegister64(), i.InputFloat32Register(0));
1633 if (i.OutputCount() > 1) {
1634 // Check for inputs below INT64_MIN and NaN.
1635 __ Fcmp(i.InputFloat32Register(0), static_cast<float>(INT64_MIN));
1636 // Check overflow.
1637 // -1 value is used to indicate a possible overflow which will occur
1638 // when subtracting (-1) from the provided INT64_MAX operand.
1639 // OutputRegister(1) is set to 0 if the input was out of range or NaN.
1640 __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
1641 __ Cset(i.OutputRegister(1), vc);
1642 }
1643 break;
1644 case kArm64Float64ToInt64:
1645 __ Fcvtzs(i.OutputRegister(0), i.InputDoubleRegister(0));
1646 if (i.OutputCount() > 1) {
1647 // See kArm64Float32ToInt64 for a detailed description.
1648 __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT64_MIN));
1649 __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
1650 __ Cset(i.OutputRegister(1), vc);
1651 }
1652 break;
1653 case kArm64Float32ToUint64:
1654 __ Fcvtzu(i.OutputRegister64(), i.InputFloat32Register(0));
1655 if (i.OutputCount() > 1) {
1656 // See kArm64Float32ToInt64 for a detailed description.
1657 __ Fcmp(i.InputFloat32Register(0), -1.0);
1658 __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
1659 __ Cset(i.OutputRegister(1), ne);
1660 }
1661 break;
1662 case kArm64Float64ToUint64:
1663 __ Fcvtzu(i.OutputRegister64(), i.InputDoubleRegister(0));
1664 if (i.OutputCount() > 1) {
1665 // See kArm64Float32ToInt64 for a detailed description.
1666 __ Fcmp(i.InputDoubleRegister(0), -1.0);
1667 __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
1668 __ Cset(i.OutputRegister(1), ne);
1669 }
1670 break;
1671 case kArm64Int32ToFloat32:
1672 __ Scvtf(i.OutputFloat32Register(), i.InputRegister32(0));
1673 break;
1674 case kArm64Int32ToFloat64:
1675 __ Scvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
1676 break;
1677 case kArm64Int64ToFloat32:
1678 __ Scvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
1679 break;
1680 case kArm64Int64ToFloat64:
1681 __ Scvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
1682 break;
1683 case kArm64Uint32ToFloat32:
1684 __ Ucvtf(i.OutputFloat32Register(), i.InputRegister32(0));
1685 break;
1686 case kArm64Uint32ToFloat64:
1687 __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
1688 break;
1689 case kArm64Uint64ToFloat32:
1690 __ Ucvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
1691 break;
1692 case kArm64Uint64ToFloat64:
1693 __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
1694 break;
1695 case kArm64Float64ExtractLowWord32:
1696 __ Fmov(i.OutputRegister32(), i.InputFloat32Register(0));
1697 break;
1698 case kArm64Float64ExtractHighWord32:
1699 __ Umov(i.OutputRegister32(), i.InputFloat64Register(0).V2S(), 1);
1700 break;
1701 case kArm64Float64InsertLowWord32:
1702 DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
1703 __ Ins(i.OutputFloat64Register().V2S(), 0, i.InputRegister32(1));
1704 break;
1705 case kArm64Float64InsertHighWord32:
1706 DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
1707 __ Ins(i.OutputFloat64Register().V2S(), 1, i.InputRegister32(1));
1708 break;
1709 case kArm64Float64MoveU64:
1710 __ Fmov(i.OutputFloat64Register(), i.InputRegister(0));
1711 break;
1712 case kArm64Float64SilenceNaN:
1713 __ CanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1714 break;
1715 case kArm64U64MoveFloat64:
1716 __ Fmov(i.OutputRegister(), i.InputDoubleRegister(0));
1717 break;
1718 case kArm64Ldrb:
1719 __ Ldrb(i.OutputRegister(), i.MemoryOperand());
1720 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1721 break;
1722 case kArm64Ldrsb:
1723 __ Ldrsb(i.OutputRegister(), i.MemoryOperand());
1724 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1725 break;
1726 case kArm64Strb:
1727 __ Strb(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1728 break;
1729 case kArm64Ldrh:
1730 __ Ldrh(i.OutputRegister(), i.MemoryOperand());
1731 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1732 break;
1733 case kArm64Ldrsh:
1734 __ Ldrsh(i.OutputRegister(), i.MemoryOperand());
1735 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1736 break;
1737 case kArm64Strh:
1738 __ Strh(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1739 break;
1740 case kArm64Ldrsw:
1741 __ Ldrsw(i.OutputRegister(), i.MemoryOperand());
1742 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1743 break;
1744 case kArm64LdrW:
1745 __ Ldr(i.OutputRegister32(), i.MemoryOperand());
1746 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1747 break;
1748 case kArm64StrW:
1749 __ Str(i.InputOrZeroRegister32(0), i.MemoryOperand(1));
1750 break;
1751 case kArm64Ldr:
1752 __ Ldr(i.OutputRegister(), i.MemoryOperand());
1753 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1754 break;
1755 case kArm64LdrDecompressTaggedSigned:
1756 __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1757 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1758 break;
1759 case kArm64LdrDecompressTaggedPointer:
1760 __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1761 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1762 break;
1763 case kArm64LdrDecompressAnyTagged:
1764 __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1765 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1766 break;
1767 case kArm64Str:
1768 __ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1769 break;
1770 case kArm64StrCompressTagged:
1771 __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1772 break;
1773 case kArm64LdrS:
1774 EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister().S());
1775 break;
1776 case kArm64StrS:
1777 __ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1));
1778 break;
1779 case kArm64LdrD:
1780 EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister());
1781 break;
1782 case kArm64StrD:
1783 __ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1));
1784 break;
1785 case kArm64LdrQ:
1786 __ Ldr(i.OutputSimd128Register(), i.MemoryOperand());
1787 break;
1788 case kArm64StrQ:
1789 __ Str(i.InputSimd128Register(0), i.MemoryOperand(1));
1790 break;
1791 case kArm64DmbIsh:
1792 __ Dmb(InnerShareable, BarrierAll);
1793 break;
1794 case kArm64DsbIsb:
1795 __ Dsb(FullSystem, BarrierAll);
1796 __ Isb();
1797 break;
1798 case kArchWordPoisonOnSpeculation:
1799 __ And(i.OutputRegister(0), i.InputRegister(0),
1800 Operand(kSpeculationPoisonRegister));
1801 break;
1802 case kWord32AtomicLoadInt8:
1803 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
1804 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1805 break;
1806 case kWord32AtomicLoadUint8:
1807 case kArm64Word64AtomicLoadUint8:
1808 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
1809 break;
1810 case kWord32AtomicLoadInt16:
1811 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
1812 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1813 break;
1814 case kWord32AtomicLoadUint16:
1815 case kArm64Word64AtomicLoadUint16:
1816 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
1817 break;
1818 case kWord32AtomicLoadWord32:
1819 case kArm64Word64AtomicLoadUint32:
1820 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register32);
1821 break;
1822 case kArm64Word64AtomicLoadUint64:
1823 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register);
1824 break;
1825 case kWord32AtomicStoreWord8:
1826 case kArm64Word64AtomicStoreWord8:
1827 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrb, Register32);
1828 break;
1829 case kWord32AtomicStoreWord16:
1830 case kArm64Word64AtomicStoreWord16:
1831 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrh, Register32);
1832 break;
1833 case kWord32AtomicStoreWord32:
1834 case kArm64Word64AtomicStoreWord32:
1835 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register32);
1836 break;
1837 case kArm64Word64AtomicStoreWord64:
1838 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register);
1839 break;
1840 case kWord32AtomicExchangeInt8:
1841 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
1842 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1843 break;
1844 case kWord32AtomicExchangeUint8:
1845 case kArm64Word64AtomicExchangeUint8:
1846 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
1847 break;
1848 case kWord32AtomicExchangeInt16:
1849 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
1850 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1851 break;
1852 case kWord32AtomicExchangeUint16:
1853 case kArm64Word64AtomicExchangeUint16:
1854 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
1855 break;
1856 case kWord32AtomicExchangeWord32:
1857 case kArm64Word64AtomicExchangeUint32:
1858 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register32);
1859 break;
1860 case kArm64Word64AtomicExchangeUint64:
1861 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register);
1862 break;
1863 case kWord32AtomicCompareExchangeInt8:
1864 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
1865 Register32);
1866 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1867 break;
1868 case kWord32AtomicCompareExchangeUint8:
1869 case kArm64Word64AtomicCompareExchangeUint8:
1870 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
1871 Register32);
1872 break;
1873 case kWord32AtomicCompareExchangeInt16:
1874 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
1875 Register32);
1876 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1877 break;
1878 case kWord32AtomicCompareExchangeUint16:
1879 case kArm64Word64AtomicCompareExchangeUint16:
1880 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
1881 Register32);
1882 break;
1883 case kWord32AtomicCompareExchangeWord32:
1884 case kArm64Word64AtomicCompareExchangeUint32:
1885 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTW, Register32);
1886 break;
1887 case kArm64Word64AtomicCompareExchangeUint64:
1888 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTX, Register);
1889 break;
1890 #define ATOMIC_BINOP_CASE(op, inst) \
1891 case kWord32Atomic##op##Int8: \
1892 ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
1893 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
1894 break; \
1895 case kWord32Atomic##op##Uint8: \
1896 case kArm64Word64Atomic##op##Uint8: \
1897 ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
1898 break; \
1899 case kWord32Atomic##op##Int16: \
1900 ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
1901 __ Sxth(i.OutputRegister(0), i.OutputRegister(0)); \
1902 break; \
1903 case kWord32Atomic##op##Uint16: \
1904 case kArm64Word64Atomic##op##Uint16: \
1905 ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
1906 break; \
1907 case kWord32Atomic##op##Word32: \
1908 case kArm64Word64Atomic##op##Uint32: \
1909 ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register32); \
1910 break; \
1911 case kArm64Word64Atomic##op##Uint64: \
1912 ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register); \
1913 break;
1914 ATOMIC_BINOP_CASE(Add, Add)
1915 ATOMIC_BINOP_CASE(Sub, Sub)
1916 ATOMIC_BINOP_CASE(And, And)
1917 ATOMIC_BINOP_CASE(Or, Orr)
1918 ATOMIC_BINOP_CASE(Xor, Eor)
1919 #undef ATOMIC_BINOP_CASE
1920 #undef ASSEMBLE_SHIFT
1921 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
1922 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
1923 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
1924 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
1925 #undef ASSEMBLE_ATOMIC_BINOP
1926 #undef ASSEMBLE_IEEE754_BINOP
1927 #undef ASSEMBLE_IEEE754_UNOP
1928
1929 #define SIMD_UNOP_CASE(Op, Instr, FORMAT) \
1930 case Op: \
1931 __ Instr(i.OutputSimd128Register().V##FORMAT(), \
1932 i.InputSimd128Register(0).V##FORMAT()); \
1933 break;
1934 #define SIMD_BINOP_CASE(Op, Instr, FORMAT) \
1935 case Op: \
1936 __ Instr(i.OutputSimd128Register().V##FORMAT(), \
1937 i.InputSimd128Register(0).V##FORMAT(), \
1938 i.InputSimd128Register(1).V##FORMAT()); \
1939 break;
1940 #define SIMD_DESTRUCTIVE_BINOP_CASE(Op, Instr, FORMAT) \
1941 case Op: { \
1942 VRegister dst = i.OutputSimd128Register().V##FORMAT(); \
1943 DCHECK_EQ(dst, i.InputSimd128Register(0).V##FORMAT()); \
1944 __ Instr(dst, i.InputSimd128Register(1).V##FORMAT(), \
1945 i.InputSimd128Register(2).V##FORMAT()); \
1946 break; \
1947 }
1948
1949 case kArm64Sxtl: {
1950 VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1951 VectorFormat narrow = VectorFormatHalfWidth(wide);
1952 __ Sxtl(i.OutputSimd128Register().Format(wide),
1953 i.InputSimd128Register(0).Format(narrow));
1954 break;
1955 }
1956 case kArm64Sxtl2: {
1957 VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1958 VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
1959 __ Sxtl2(i.OutputSimd128Register().Format(wide),
1960 i.InputSimd128Register(0).Format(narrow));
1961 break;
1962 }
1963 case kArm64Uxtl: {
1964 VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1965 VectorFormat narrow = VectorFormatHalfWidth(wide);
1966 __ Uxtl(i.OutputSimd128Register().Format(wide),
1967 i.InputSimd128Register(0).Format(narrow));
1968 break;
1969 }
1970 case kArm64Uxtl2: {
1971 VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
1972 VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
1973 __ Uxtl2(i.OutputSimd128Register().Format(wide),
1974 i.InputSimd128Register(0).Format(narrow));
1975 break;
1976 }
1977 case kArm64F64x2Splat: {
1978 __ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
1979 break;
1980 }
1981 case kArm64F64x2ExtractLane: {
1982 __ Mov(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D(),
1983 i.InputInt8(1));
1984 break;
1985 }
1986 case kArm64F64x2ReplaceLane: {
1987 VRegister dst = i.OutputSimd128Register().V2D(),
1988 src1 = i.InputSimd128Register(0).V2D();
1989 if (dst != src1) {
1990 __ Mov(dst, src1);
1991 }
1992 __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V2D(), 0);
1993 break;
1994 }
1995 SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D);
1996 SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D);
1997 SIMD_UNOP_CASE(kArm64F64x2Sqrt, Fsqrt, 2D);
1998 SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D);
1999 SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D);
2000 SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D);
2001 SIMD_BINOP_CASE(kArm64F64x2Div, Fdiv, 2D);
2002 SIMD_BINOP_CASE(kArm64F64x2Min, Fmin, 2D);
2003 SIMD_BINOP_CASE(kArm64F64x2Max, Fmax, 2D);
2004 SIMD_BINOP_CASE(kArm64F64x2Eq, Fcmeq, 2D);
2005 case kArm64F64x2Ne: {
2006 VRegister dst = i.OutputSimd128Register().V2D();
2007 __ Fcmeq(dst, i.InputSimd128Register(0).V2D(),
2008 i.InputSimd128Register(1).V2D());
2009 __ Mvn(dst, dst);
2010 break;
2011 }
2012 case kArm64F64x2Lt: {
2013 __ Fcmgt(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
2014 i.InputSimd128Register(0).V2D());
2015 break;
2016 }
2017 case kArm64F64x2Le: {
2018 __ Fcmge(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
2019 i.InputSimd128Register(0).V2D());
2020 break;
2021 }
2022 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
2023 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
2024 case kArm64F64x2Pmin: {
2025 VRegister dst = i.OutputSimd128Register().V2D();
2026 VRegister lhs = i.InputSimd128Register(0).V2D();
2027 VRegister rhs = i.InputSimd128Register(1).V2D();
2028 // f64x2.pmin(lhs, rhs)
2029 // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
2030 // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
2031 __ Fcmgt(dst, lhs, rhs);
2032 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2033 break;
2034 }
2035 case kArm64F64x2Pmax: {
2036 VRegister dst = i.OutputSimd128Register().V2D();
2037 VRegister lhs = i.InputSimd128Register(0).V2D();
2038 VRegister rhs = i.InputSimd128Register(1).V2D();
2039 // f64x2.pmax(lhs, rhs)
2040 // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
2041 __ Fcmgt(dst, rhs, lhs);
2042 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2043 break;
2044 }
2045 case kArm64F32x4Splat: {
2046 __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
2047 break;
2048 }
2049 case kArm64F32x4ExtractLane: {
2050 __ Mov(i.OutputSimd128Register().S(), i.InputSimd128Register(0).V4S(),
2051 i.InputInt8(1));
2052 break;
2053 }
2054 case kArm64F32x4ReplaceLane: {
2055 VRegister dst = i.OutputSimd128Register().V4S(),
2056 src1 = i.InputSimd128Register(0).V4S();
2057 if (dst != src1) {
2058 __ Mov(dst, src1);
2059 }
2060 __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V4S(), 0);
2061 break;
2062 }
2063 SIMD_UNOP_CASE(kArm64F32x4SConvertI32x4, Scvtf, 4S);
2064 SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S);
2065 SIMD_UNOP_CASE(kArm64F32x4Abs, Fabs, 4S);
2066 SIMD_UNOP_CASE(kArm64F32x4Neg, Fneg, 4S);
2067 SIMD_UNOP_CASE(kArm64F32x4Sqrt, Fsqrt, 4S);
2068 SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S);
2069 SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S);
2070 SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S);
2071 SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S);
2072 SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S);
2073 SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S);
2074 SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S);
2075 SIMD_BINOP_CASE(kArm64F32x4Min, Fmin, 4S);
2076 SIMD_BINOP_CASE(kArm64F32x4Max, Fmax, 4S);
2077 SIMD_BINOP_CASE(kArm64F32x4Eq, Fcmeq, 4S);
2078 case kArm64F32x4Ne: {
2079 VRegister dst = i.OutputSimd128Register().V4S();
2080 __ Fcmeq(dst, i.InputSimd128Register(0).V4S(),
2081 i.InputSimd128Register(1).V4S());
2082 __ Mvn(dst, dst);
2083 break;
2084 }
2085 case kArm64F32x4Lt: {
2086 __ Fcmgt(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
2087 i.InputSimd128Register(0).V4S());
2088 break;
2089 }
2090 case kArm64F32x4Le: {
2091 __ Fcmge(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
2092 i.InputSimd128Register(0).V4S());
2093 break;
2094 }
2095 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
2096 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
2097 case kArm64F32x4Pmin: {
2098 VRegister dst = i.OutputSimd128Register().V4S();
2099 VRegister lhs = i.InputSimd128Register(0).V4S();
2100 VRegister rhs = i.InputSimd128Register(1).V4S();
2101 // f32x4.pmin(lhs, rhs)
2102 // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
2103 // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
2104 __ Fcmgt(dst, lhs, rhs);
2105 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2106 break;
2107 }
2108 case kArm64F32x4Pmax: {
2109 VRegister dst = i.OutputSimd128Register().V4S();
2110 VRegister lhs = i.InputSimd128Register(0).V4S();
2111 VRegister rhs = i.InputSimd128Register(1).V4S();
2112 // f32x4.pmax(lhs, rhs)
2113 // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
2114 __ Fcmgt(dst, rhs, lhs);
2115 __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
2116 break;
2117 }
2118 case kArm64I64x2Splat: {
2119 __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
2120 break;
2121 }
2122 case kArm64I64x2ExtractLane: {
2123 __ Mov(i.OutputRegister64(), i.InputSimd128Register(0).V2D(),
2124 i.InputInt8(1));
2125 break;
2126 }
2127 case kArm64I64x2ReplaceLane: {
2128 VRegister dst = i.OutputSimd128Register().V2D(),
2129 src1 = i.InputSimd128Register(0).V2D();
2130 if (dst != src1) {
2131 __ Mov(dst, src1);
2132 }
2133 __ Mov(dst, i.InputInt8(1), i.InputRegister64(2));
2134 break;
2135 }
2136 SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D);
2137 case kArm64I64x2Shl: {
2138 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 6, V2D, Sshl, X);
2139 break;
2140 }
2141 case kArm64I64x2ShrS: {
2142 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 6, V2D, Sshl, X);
2143 break;
2144 }
2145 SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D);
2146 SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D);
2147 case kArm64I64x2Mul: {
2148 UseScratchRegisterScope scope(tasm());
2149 VRegister dst = i.OutputSimd128Register();
2150 VRegister src1 = i.InputSimd128Register(0);
2151 VRegister src2 = i.InputSimd128Register(1);
2152 VRegister tmp1 = scope.AcquireSameSizeAs(dst);
2153 VRegister tmp2 = scope.AcquireSameSizeAs(dst);
2154 VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0));
2155
2156 // This 2x64-bit multiplication is performed with several 32-bit
2157 // multiplications.
2158
2159 // 64-bit numbers x and y, can be represented as:
2160 // x = a + 2^32(b)
2161 // y = c + 2^32(d)
2162
2163 // A 64-bit multiplication is:
2164 // x * y = ac + 2^32(ad + bc) + 2^64(bd)
2165 // note: `2^64(bd)` can be ignored, the value is too large to fit in
2166 // 64-bits.
2167
2168 // This sequence implements a 2x64bit multiply, where the registers
2169 // `src1` and `src2` are split up into 32-bit components:
2170 // src1 = |d|c|b|a|
2171 // src2 = |h|g|f|e|
2172 //
2173 // src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
2174
2175 // Reverse the 32-bit elements in the 64-bit words.
2176 // tmp2 = |g|h|e|f|
2177 __ Rev64(tmp2.V4S(), src2.V4S());
2178
2179 // Calculate the high half components.
2180 // tmp2 = |dg|ch|be|af|
2181 __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S());
2182
2183 // Extract the low half components of src1.
2184 // tmp1 = |c|a|
2185 __ Xtn(tmp1.V2S(), src1.V2D());
2186
2187 // Sum the respective high half components.
2188 // tmp2 = |dg+ch|be+af||dg+ch|be+af|
2189 __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
2190
2191 // Extract the low half components of src2.
2192 // tmp3 = |g|e|
2193 __ Xtn(tmp3.V2S(), src2.V2D());
2194
2195 // Shift the high half components, into the high half.
2196 // dst = |dg+ch << 32|be+af << 32|
2197 __ Shll(dst.V2D(), tmp2.V2S(), 32);
2198
2199 // Multiply the low components together, and accumulate with the high
2200 // half.
2201 // dst = |dst[1] + cg|dst[0] + ae|
2202 __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S());
2203
2204 break;
2205 }
2206 SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D);
2207 case kArm64I64x2ShrU: {
2208 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
2209 break;
2210 }
2211 case kArm64I32x4Splat: {
2212 __ Dup(i.OutputSimd128Register().V4S(), i.InputRegister32(0));
2213 break;
2214 }
2215 case kArm64I32x4ExtractLane: {
2216 __ Mov(i.OutputRegister32(), i.InputSimd128Register(0).V4S(),
2217 i.InputInt8(1));
2218 break;
2219 }
2220 case kArm64I32x4ReplaceLane: {
2221 VRegister dst = i.OutputSimd128Register().V4S(),
2222 src1 = i.InputSimd128Register(0).V4S();
2223 if (dst != src1) {
2224 __ Mov(dst, src1);
2225 }
2226 __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2227 break;
2228 }
2229 SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
2230 SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
2231 case kArm64I32x4Shl: {
2232 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
2233 break;
2234 }
2235 case kArm64I32x4ShrS: {
2236 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 5, V4S, Sshl, W);
2237 break;
2238 }
2239 SIMD_BINOP_CASE(kArm64I32x4Add, Add, 4S);
2240 SIMD_BINOP_CASE(kArm64I32x4AddHoriz, Addp, 4S);
2241 SIMD_BINOP_CASE(kArm64I32x4Sub, Sub, 4S);
2242 SIMD_BINOP_CASE(kArm64I32x4Mul, Mul, 4S);
2243 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mla, Mla, 4S);
2244 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mls, Mls, 4S);
2245 SIMD_BINOP_CASE(kArm64I32x4MinS, Smin, 4S);
2246 SIMD_BINOP_CASE(kArm64I32x4MaxS, Smax, 4S);
2247 SIMD_BINOP_CASE(kArm64I32x4Eq, Cmeq, 4S);
2248 case kArm64I32x4Ne: {
2249 VRegister dst = i.OutputSimd128Register().V4S();
2250 __ Cmeq(dst, i.InputSimd128Register(0).V4S(),
2251 i.InputSimd128Register(1).V4S());
2252 __ Mvn(dst, dst);
2253 break;
2254 }
2255 SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S);
2256 SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S);
2257 SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
2258 case kArm64I32x4ShrU: {
2259 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
2260 break;
2261 }
2262 SIMD_BINOP_CASE(kArm64I32x4MinU, Umin, 4S);
2263 SIMD_BINOP_CASE(kArm64I32x4MaxU, Umax, 4S);
2264 SIMD_BINOP_CASE(kArm64I32x4GtU, Cmhi, 4S);
2265 SIMD_BINOP_CASE(kArm64I32x4GeU, Cmhs, 4S);
2266 SIMD_UNOP_CASE(kArm64I32x4Abs, Abs, 4S);
2267 case kArm64I32x4BitMask: {
2268 Register dst = i.OutputRegister32();
2269 VRegister src = i.InputSimd128Register(0);
2270 VRegister tmp = i.TempSimd128Register(0);
2271 VRegister mask = i.TempSimd128Register(1);
2272
2273 __ Sshr(tmp.V4S(), src.V4S(), 31);
2274 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2275 // are signed will have i-th bit set, unsigned will be 0.
2276 __ Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
2277 __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2278 __ Addv(tmp.S(), tmp.V4S());
2279 __ Mov(dst.W(), tmp.V4S(), 0);
2280 break;
2281 }
2282 case kArm64I32x4DotI16x8S: {
2283 UseScratchRegisterScope scope(tasm());
2284 VRegister lhs = i.InputSimd128Register(0);
2285 VRegister rhs = i.InputSimd128Register(1);
2286 VRegister tmp1 = scope.AcquireV(kFormat4S);
2287 VRegister tmp2 = scope.AcquireV(kFormat4S);
2288 __ Smull(tmp1, lhs.V4H(), rhs.V4H());
2289 __ Smull2(tmp2, lhs.V8H(), rhs.V8H());
2290 __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
2291 break;
2292 }
2293 case kArm64I16x8Splat: {
2294 __ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
2295 break;
2296 }
2297 case kArm64I16x8ExtractLaneU: {
2298 __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
2299 i.InputInt8(1));
2300 break;
2301 }
2302 case kArm64I16x8ExtractLaneS: {
2303 __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
2304 i.InputInt8(1));
2305 break;
2306 }
2307 case kArm64I16x8ReplaceLane: {
2308 VRegister dst = i.OutputSimd128Register().V8H(),
2309 src1 = i.InputSimd128Register(0).V8H();
2310 if (dst != src1) {
2311 __ Mov(dst, src1);
2312 }
2313 __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2314 break;
2315 }
2316 SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
2317 case kArm64I16x8Shl: {
2318 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
2319 break;
2320 }
2321 case kArm64I16x8ShrS: {
2322 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 4, V8H, Sshl, W);
2323 break;
2324 }
2325 case kArm64I16x8SConvertI32x4: {
2326 VRegister dst = i.OutputSimd128Register(),
2327 src0 = i.InputSimd128Register(0),
2328 src1 = i.InputSimd128Register(1);
2329 UseScratchRegisterScope scope(tasm());
2330 VRegister temp = scope.AcquireV(kFormat4S);
2331 if (dst == src1) {
2332 __ Mov(temp, src1.V4S());
2333 src1 = temp;
2334 }
2335 __ Sqxtn(dst.V4H(), src0.V4S());
2336 __ Sqxtn2(dst.V8H(), src1.V4S());
2337 break;
2338 }
2339 SIMD_BINOP_CASE(kArm64I16x8Add, Add, 8H);
2340 SIMD_BINOP_CASE(kArm64I16x8AddSatS, Sqadd, 8H);
2341 SIMD_BINOP_CASE(kArm64I16x8AddHoriz, Addp, 8H);
2342 SIMD_BINOP_CASE(kArm64I16x8Sub, Sub, 8H);
2343 SIMD_BINOP_CASE(kArm64I16x8SubSatS, Sqsub, 8H);
2344 SIMD_BINOP_CASE(kArm64I16x8Mul, Mul, 8H);
2345 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I16x8Mla, Mla, 8H);
2346 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I16x8Mls, Mls, 8H);
2347 SIMD_BINOP_CASE(kArm64I16x8MinS, Smin, 8H);
2348 SIMD_BINOP_CASE(kArm64I16x8MaxS, Smax, 8H);
2349 SIMD_BINOP_CASE(kArm64I16x8Eq, Cmeq, 8H);
2350 case kArm64I16x8Ne: {
2351 VRegister dst = i.OutputSimd128Register().V8H();
2352 __ Cmeq(dst, i.InputSimd128Register(0).V8H(),
2353 i.InputSimd128Register(1).V8H());
2354 __ Mvn(dst, dst);
2355 break;
2356 }
2357 SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H);
2358 SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H);
2359 case kArm64I16x8ShrU: {
2360 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
2361 break;
2362 }
2363 case kArm64I16x8UConvertI32x4: {
2364 VRegister dst = i.OutputSimd128Register(),
2365 src0 = i.InputSimd128Register(0),
2366 src1 = i.InputSimd128Register(1);
2367 UseScratchRegisterScope scope(tasm());
2368 VRegister temp = scope.AcquireV(kFormat4S);
2369 if (dst == src1) {
2370 __ Mov(temp, src1.V4S());
2371 src1 = temp;
2372 }
2373 __ Sqxtun(dst.V4H(), src0.V4S());
2374 __ Sqxtun2(dst.V8H(), src1.V4S());
2375 break;
2376 }
2377 SIMD_BINOP_CASE(kArm64I16x8AddSatU, Uqadd, 8H);
2378 SIMD_BINOP_CASE(kArm64I16x8SubSatU, Uqsub, 8H);
2379 SIMD_BINOP_CASE(kArm64I16x8MinU, Umin, 8H);
2380 SIMD_BINOP_CASE(kArm64I16x8MaxU, Umax, 8H);
2381 SIMD_BINOP_CASE(kArm64I16x8GtU, Cmhi, 8H);
2382 SIMD_BINOP_CASE(kArm64I16x8GeU, Cmhs, 8H);
2383 SIMD_BINOP_CASE(kArm64I16x8RoundingAverageU, Urhadd, 8H);
2384 SIMD_BINOP_CASE(kArm64I16x8Q15MulRSatS, Sqrdmulh, 8H);
2385 SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
2386 case kArm64I16x8BitMask: {
2387 Register dst = i.OutputRegister32();
2388 VRegister src = i.InputSimd128Register(0);
2389 VRegister tmp = i.TempSimd128Register(0);
2390 VRegister mask = i.TempSimd128Register(1);
2391
2392 __ Sshr(tmp.V8H(), src.V8H(), 15);
2393 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2394 // are signed will have i-th bit set, unsigned will be 0.
2395 __ Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
2396 __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2397 __ Addv(tmp.H(), tmp.V8H());
2398 __ Mov(dst.W(), tmp.V8H(), 0);
2399 break;
2400 }
2401 case kArm64I8x16Splat: {
2402 __ Dup(i.OutputSimd128Register().V16B(), i.InputRegister32(0));
2403 break;
2404 }
2405 case kArm64I8x16ExtractLaneU: {
2406 __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
2407 i.InputInt8(1));
2408 break;
2409 }
2410 case kArm64I8x16ExtractLaneS: {
2411 __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
2412 i.InputInt8(1));
2413 break;
2414 }
2415 case kArm64I8x16ReplaceLane: {
2416 VRegister dst = i.OutputSimd128Register().V16B(),
2417 src1 = i.InputSimd128Register(0).V16B();
2418 if (dst != src1) {
2419 __ Mov(dst, src1);
2420 }
2421 __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2422 break;
2423 }
2424 SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B);
2425 case kArm64I8x16Shl: {
2426 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 3, V16B, Sshl, W);
2427 break;
2428 }
2429 case kArm64I8x16ShrS: {
2430 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 3, V16B, Sshl, W);
2431 break;
2432 }
2433 case kArm64I8x16SConvertI16x8: {
2434 VRegister dst = i.OutputSimd128Register(),
2435 src0 = i.InputSimd128Register(0),
2436 src1 = i.InputSimd128Register(1);
2437 UseScratchRegisterScope scope(tasm());
2438 VRegister temp = scope.AcquireV(kFormat8H);
2439 if (dst == src1) {
2440 __ Mov(temp, src1.V8H());
2441 src1 = temp;
2442 }
2443 __ Sqxtn(dst.V8B(), src0.V8H());
2444 __ Sqxtn2(dst.V16B(), src1.V8H());
2445 break;
2446 }
2447 SIMD_BINOP_CASE(kArm64I8x16Add, Add, 16B);
2448 SIMD_BINOP_CASE(kArm64I8x16AddSatS, Sqadd, 16B);
2449 SIMD_BINOP_CASE(kArm64I8x16Sub, Sub, 16B);
2450 SIMD_BINOP_CASE(kArm64I8x16SubSatS, Sqsub, 16B);
2451 SIMD_BINOP_CASE(kArm64I8x16Mul, Mul, 16B);
2452 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I8x16Mla, Mla, 16B);
2453 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I8x16Mls, Mls, 16B);
2454 SIMD_BINOP_CASE(kArm64I8x16MinS, Smin, 16B);
2455 SIMD_BINOP_CASE(kArm64I8x16MaxS, Smax, 16B);
2456 SIMD_BINOP_CASE(kArm64I8x16Eq, Cmeq, 16B);
2457 case kArm64I8x16Ne: {
2458 VRegister dst = i.OutputSimd128Register().V16B();
2459 __ Cmeq(dst, i.InputSimd128Register(0).V16B(),
2460 i.InputSimd128Register(1).V16B());
2461 __ Mvn(dst, dst);
2462 break;
2463 }
2464 SIMD_BINOP_CASE(kArm64I8x16GtS, Cmgt, 16B);
2465 SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B);
2466 case kArm64I8x16ShrU: {
2467 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 3, V16B, Ushl, W);
2468 break;
2469 }
2470 case kArm64I8x16UConvertI16x8: {
2471 VRegister dst = i.OutputSimd128Register(),
2472 src0 = i.InputSimd128Register(0),
2473 src1 = i.InputSimd128Register(1);
2474 UseScratchRegisterScope scope(tasm());
2475 VRegister temp = scope.AcquireV(kFormat8H);
2476 if (dst == src1) {
2477 __ Mov(temp, src1.V8H());
2478 src1 = temp;
2479 }
2480 __ Sqxtun(dst.V8B(), src0.V8H());
2481 __ Sqxtun2(dst.V16B(), src1.V8H());
2482 break;
2483 }
2484 SIMD_BINOP_CASE(kArm64I8x16AddSatU, Uqadd, 16B);
2485 SIMD_BINOP_CASE(kArm64I8x16SubSatU, Uqsub, 16B);
2486 SIMD_BINOP_CASE(kArm64I8x16MinU, Umin, 16B);
2487 SIMD_BINOP_CASE(kArm64I8x16MaxU, Umax, 16B);
2488 SIMD_BINOP_CASE(kArm64I8x16GtU, Cmhi, 16B);
2489 SIMD_BINOP_CASE(kArm64I8x16GeU, Cmhs, 16B);
2490 SIMD_BINOP_CASE(kArm64I8x16RoundingAverageU, Urhadd, 16B);
2491 SIMD_UNOP_CASE(kArm64I8x16Abs, Abs, 16B);
2492 case kArm64I8x16BitMask: {
2493 Register dst = i.OutputRegister32();
2494 VRegister src = i.InputSimd128Register(0);
2495 VRegister tmp = i.TempSimd128Register(0);
2496 VRegister mask = i.TempSimd128Register(1);
2497
2498 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2499 // are signed will have i-th bit set, unsigned will be 0.
2500 __ Sshr(tmp.V16B(), src.V16B(), 7);
2501 __ Movi(mask.V2D(), 0x8040'2010'0804'0201);
2502 __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2503 __ Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
2504 __ Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
2505 __ Addv(tmp.H(), tmp.V8H());
2506 __ Mov(dst.W(), tmp.V8H(), 0);
2507 break;
2508 }
2509 case kArm64S128Const: {
2510 uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
2511 uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
2512 __ Movi(i.OutputSimd128Register().V16B(), imm2, imm1);
2513 break;
2514 }
2515 case kArm64S128Zero: {
2516 VRegister dst = i.OutputSimd128Register().V16B();
2517 __ Eor(dst, dst, dst);
2518 break;
2519 }
2520 SIMD_BINOP_CASE(kArm64S128And, And, 16B);
2521 SIMD_BINOP_CASE(kArm64S128Or, Orr, 16B);
2522 SIMD_BINOP_CASE(kArm64S128Xor, Eor, 16B);
2523 SIMD_UNOP_CASE(kArm64S128Not, Mvn, 16B);
2524 case kArm64S128Dup: {
2525 VRegister dst = i.OutputSimd128Register(),
2526 src = i.InputSimd128Register(0);
2527 int lanes = i.InputInt32(1);
2528 int index = i.InputInt32(2);
2529 switch (lanes) {
2530 case 4:
2531 __ Dup(dst.V4S(), src.V4S(), index);
2532 break;
2533 case 8:
2534 __ Dup(dst.V8H(), src.V8H(), index);
2535 break;
2536 case 16:
2537 __ Dup(dst.V16B(), src.V16B(), index);
2538 break;
2539 default:
2540 UNREACHABLE();
2541 break;
2542 }
2543 break;
2544 }
2545 SIMD_DESTRUCTIVE_BINOP_CASE(kArm64S128Select, Bsl, 16B);
2546 SIMD_BINOP_CASE(kArm64S128AndNot, Bic, 16B);
2547 case kArm64S32x4Shuffle: {
2548 Simd128Register dst = i.OutputSimd128Register().V4S(),
2549 src0 = i.InputSimd128Register(0).V4S(),
2550 src1 = i.InputSimd128Register(1).V4S();
2551 // Check for in-place shuffles.
2552 // If dst == src0 == src1, then the shuffle is unary and we only use src0.
2553 UseScratchRegisterScope scope(tasm());
2554 VRegister temp = scope.AcquireV(kFormat4S);
2555 if (dst == src0) {
2556 __ Mov(temp, src0);
2557 src0 = temp;
2558 } else if (dst == src1) {
2559 __ Mov(temp, src1);
2560 src1 = temp;
2561 }
2562 // Perform shuffle as a vmov per lane.
2563 int32_t shuffle = i.InputInt32(2);
2564 for (int i = 0; i < 4; i++) {
2565 VRegister src = src0;
2566 int lane = shuffle & 0x7;
2567 if (lane >= 4) {
2568 src = src1;
2569 lane &= 0x3;
2570 }
2571 __ Mov(dst, i, src, lane);
2572 shuffle >>= 8;
2573 }
2574 break;
2575 }
2576 SIMD_BINOP_CASE(kArm64S32x4ZipLeft, Zip1, 4S);
2577 SIMD_BINOP_CASE(kArm64S32x4ZipRight, Zip2, 4S);
2578 SIMD_BINOP_CASE(kArm64S32x4UnzipLeft, Uzp1, 4S);
2579 SIMD_BINOP_CASE(kArm64S32x4UnzipRight, Uzp2, 4S);
2580 SIMD_BINOP_CASE(kArm64S32x4TransposeLeft, Trn1, 4S);
2581 SIMD_BINOP_CASE(kArm64S32x4TransposeRight, Trn2, 4S);
2582 SIMD_BINOP_CASE(kArm64S16x8ZipLeft, Zip1, 8H);
2583 SIMD_BINOP_CASE(kArm64S16x8ZipRight, Zip2, 8H);
2584 SIMD_BINOP_CASE(kArm64S16x8UnzipLeft, Uzp1, 8H);
2585 SIMD_BINOP_CASE(kArm64S16x8UnzipRight, Uzp2, 8H);
2586 SIMD_BINOP_CASE(kArm64S16x8TransposeLeft, Trn1, 8H);
2587 SIMD_BINOP_CASE(kArm64S16x8TransposeRight, Trn2, 8H);
2588 SIMD_BINOP_CASE(kArm64S8x16ZipLeft, Zip1, 16B);
2589 SIMD_BINOP_CASE(kArm64S8x16ZipRight, Zip2, 16B);
2590 SIMD_BINOP_CASE(kArm64S8x16UnzipLeft, Uzp1, 16B);
2591 SIMD_BINOP_CASE(kArm64S8x16UnzipRight, Uzp2, 16B);
2592 SIMD_BINOP_CASE(kArm64S8x16TransposeLeft, Trn1, 16B);
2593 SIMD_BINOP_CASE(kArm64S8x16TransposeRight, Trn2, 16B);
2594 case kArm64S8x16Concat: {
2595 __ Ext(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
2596 i.InputSimd128Register(1).V16B(), i.InputInt4(2));
2597 break;
2598 }
2599 case kArm64I8x16Swizzle: {
2600 __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
2601 i.InputSimd128Register(1).V16B());
2602 break;
2603 }
2604 case kArm64I8x16Shuffle: {
2605 Simd128Register dst = i.OutputSimd128Register().V16B(),
2606 src0 = i.InputSimd128Register(0).V16B(),
2607 src1 = i.InputSimd128Register(1).V16B();
2608 // Unary shuffle table is in src0, binary shuffle table is in src0, src1,
2609 // which must be consecutive.
2610 if (src0 != src1) {
2611 DCHECK(AreConsecutive(src0, src1));
2612 }
2613
2614 int64_t imm1 = make_uint64(i.InputInt32(3), i.InputInt32(2));
2615 int64_t imm2 = make_uint64(i.InputInt32(5), i.InputInt32(4));
2616 DCHECK_EQ(0, (imm1 | imm2) & (src0 == src1 ? 0xF0F0F0F0F0F0F0F0
2617 : 0xE0E0E0E0E0E0E0E0));
2618
2619 UseScratchRegisterScope scope(tasm());
2620 VRegister temp = scope.AcquireV(kFormat16B);
2621 __ Movi(temp, imm2, imm1);
2622
2623 if (src0 == src1) {
2624 __ Tbl(dst, src0, temp.V16B());
2625 } else {
2626 __ Tbl(dst, src0, src1, temp.V16B());
2627 }
2628 break;
2629 }
2630 SIMD_UNOP_CASE(kArm64S32x2Reverse, Rev64, 4S);
2631 SIMD_UNOP_CASE(kArm64S16x4Reverse, Rev64, 8H);
2632 SIMD_UNOP_CASE(kArm64S16x2Reverse, Rev32, 8H);
2633 SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
2634 SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
2635 SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
2636 case kArm64LoadSplat: {
2637 VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
2638 __ ld1r(i.OutputSimd128Register().Format(f), i.MemoryOperand(0));
2639 break;
2640 }
2641 case kArm64S128Load8x8S: {
2642 __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
2643 __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
2644 break;
2645 }
2646 case kArm64S128Load8x8U: {
2647 __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
2648 __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
2649 break;
2650 }
2651 case kArm64S128Load16x4S: {
2652 __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
2653 __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
2654 break;
2655 }
2656 case kArm64S128Load16x4U: {
2657 __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
2658 __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
2659 break;
2660 }
2661 case kArm64S128Load32x2S: {
2662 __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
2663 __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
2664 break;
2665 }
2666 case kArm64S128Load32x2U: {
2667 __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
2668 __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
2669 break;
2670 }
2671 case kArm64S128Load32Zero: {
2672 __ Ldr(i.OutputSimd128Register().S(), i.MemoryOperand(0));
2673 break;
2674 }
2675 case kArm64S128Load64Zero: {
2676 __ Ldr(i.OutputSimd128Register().D(), i.MemoryOperand(0));
2677 break;
2678 }
2679 #define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \
2680 case Op: { \
2681 UseScratchRegisterScope scope(tasm()); \
2682 VRegister temp = scope.AcquireV(format); \
2683 __ Instr(temp, i.InputSimd128Register(0).V##FORMAT()); \
2684 __ Umov(i.OutputRegister32(), temp, 0); \
2685 __ Cmp(i.OutputRegister32(), 0); \
2686 __ Cset(i.OutputRegister32(), ne); \
2687 break; \
2688 }
2689 // For AnyTrue, the format does not matter.
2690 SIMD_REDUCE_OP_CASE(kArm64V128AnyTrue, Umaxv, kFormatS, 4S);
2691 SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S);
2692 SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H);
2693 SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B);
2694 }
2695 return kSuccess;
2696 } // NOLINT(readability/fn_size)
2697
2698 #undef SIMD_UNOP_CASE
2699 #undef SIMD_BINOP_CASE
2700 #undef SIMD_DESTRUCTIVE_BINOP_CASE
2701 #undef SIMD_REDUCE_OP_CASE
2702 #undef ASSEMBLE_SIMD_SHIFT_LEFT
2703 #undef ASSEMBLE_SIMD_SHIFT_RIGHT
2704
2705 // Assemble branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)2706 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2707 Arm64OperandConverter i(this, instr);
2708 Label* tlabel = branch->true_label;
2709 Label* flabel = branch->false_label;
2710 FlagsCondition condition = branch->condition;
2711 ArchOpcode opcode = instr->arch_opcode();
2712
2713 if (opcode == kArm64CompareAndBranch32) {
2714 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2715 switch (condition) {
2716 case kEqual:
2717 __ Cbz(i.InputRegister32(0), tlabel);
2718 break;
2719 case kNotEqual:
2720 __ Cbnz(i.InputRegister32(0), tlabel);
2721 break;
2722 default:
2723 UNREACHABLE();
2724 }
2725 } else if (opcode == kArm64CompareAndBranch) {
2726 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2727 switch (condition) {
2728 case kEqual:
2729 __ Cbz(i.InputRegister64(0), tlabel);
2730 break;
2731 case kNotEqual:
2732 __ Cbnz(i.InputRegister64(0), tlabel);
2733 break;
2734 default:
2735 UNREACHABLE();
2736 }
2737 } else if (opcode == kArm64TestAndBranch32) {
2738 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2739 switch (condition) {
2740 case kEqual:
2741 __ Tbz(i.InputRegister32(0), i.InputInt5(1), tlabel);
2742 break;
2743 case kNotEqual:
2744 __ Tbnz(i.InputRegister32(0), i.InputInt5(1), tlabel);
2745 break;
2746 default:
2747 UNREACHABLE();
2748 }
2749 } else if (opcode == kArm64TestAndBranch) {
2750 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2751 switch (condition) {
2752 case kEqual:
2753 __ Tbz(i.InputRegister64(0), i.InputInt6(1), tlabel);
2754 break;
2755 case kNotEqual:
2756 __ Tbnz(i.InputRegister64(0), i.InputInt6(1), tlabel);
2757 break;
2758 default:
2759 UNREACHABLE();
2760 }
2761 } else {
2762 Condition cc = FlagsConditionToCondition(condition);
2763 __ B(cc, tlabel);
2764 }
2765 if (!branch->fallthru) __ B(flabel); // no fallthru to flabel.
2766 }
2767
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)2768 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2769 Instruction* instr) {
2770 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2771 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2772 return;
2773 }
2774
2775 condition = NegateFlagsCondition(condition);
2776 __ CmovX(kSpeculationPoisonRegister, xzr,
2777 FlagsConditionToCondition(condition));
2778 __ Csdb();
2779 }
2780
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)2781 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2782 BranchInfo* branch) {
2783 AssembleArchBranch(instr, branch);
2784 }
2785
AssembleArchJump(RpoNumber target)2786 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2787 if (!IsNextInAssemblyOrder(target)) __ B(GetLabel(target));
2788 }
2789
AssembleArchTrap(Instruction * instr,FlagsCondition condition)2790 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2791 FlagsCondition condition) {
2792 class OutOfLineTrap final : public OutOfLineCode {
2793 public:
2794 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
2795 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
2796 void Generate() final {
2797 Arm64OperandConverter i(gen_, instr_);
2798 TrapId trap_id =
2799 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
2800 GenerateCallToTrap(trap_id);
2801 }
2802
2803 private:
2804 void GenerateCallToTrap(TrapId trap_id) {
2805 if (trap_id == TrapId::kInvalid) {
2806 // We cannot test calls to the runtime in cctest/test-run-wasm.
2807 // Therefore we emit a call to C here instead of a call to the runtime.
2808 __ CallCFunction(
2809 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2810 __ LeaveFrame(StackFrame::WASM);
2811 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
2812 int pop_count =
2813 static_cast<int>(call_descriptor->StackParameterCount());
2814 pop_count += (pop_count & 1); // align
2815 __ Drop(pop_count);
2816 __ Ret();
2817 } else {
2818 gen_->AssembleSourcePosition(instr_);
2819 // A direct call to a wasm runtime stub defined in this module.
2820 // Just encode the stub index. This will be patched when the code
2821 // is added to the native module and copied into wasm code space.
2822 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
2823 ReferenceMap* reference_map =
2824 gen_->zone()->New<ReferenceMap>(gen_->zone());
2825 gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
2826 if (FLAG_debug_code) {
2827 // The trap code should never return.
2828 __ Brk(0);
2829 }
2830 }
2831 }
2832 Instruction* instr_;
2833 CodeGenerator* gen_;
2834 };
2835 auto ool = zone()->New<OutOfLineTrap>(this, instr);
2836 Label* tlabel = ool->entry();
2837 Condition cc = FlagsConditionToCondition(condition);
2838 __ B(cc, tlabel);
2839 }
2840
2841 // Assemble boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)2842 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2843 FlagsCondition condition) {
2844 Arm64OperandConverter i(this, instr);
2845
2846 // Materialize a full 64-bit 1 or 0 value. The result register is always the
2847 // last output of the instruction.
2848 DCHECK_NE(0u, instr->OutputCount());
2849 Register reg = i.OutputRegister(instr->OutputCount() - 1);
2850 Condition cc = FlagsConditionToCondition(condition);
2851 __ Cset(reg, cc);
2852 }
2853
AssembleArchBinarySearchSwitch(Instruction * instr)2854 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2855 Arm64OperandConverter i(this, instr);
2856 Register input = i.InputRegister32(0);
2857 std::vector<std::pair<int32_t, Label*>> cases;
2858 for (size_t index = 2; index < instr->InputCount(); index += 2) {
2859 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2860 }
2861 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2862 cases.data() + cases.size());
2863 }
2864
AssembleArchTableSwitch(Instruction * instr)2865 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
2866 Arm64OperandConverter i(this, instr);
2867 UseScratchRegisterScope scope(tasm());
2868 Register input = i.InputRegister32(0);
2869 Register temp = scope.AcquireX();
2870 size_t const case_count = instr->InputCount() - 2;
2871 Label table;
2872 __ Cmp(input, case_count);
2873 __ B(hs, GetLabel(i.InputRpo(1)));
2874 __ Adr(temp, &table);
2875 int entry_size_log2 = 2;
2876 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
2877 ++entry_size_log2; // Account for BTI.
2878 #endif
2879 __ Add(temp, temp, Operand(input, UXTW, entry_size_log2));
2880 __ Br(temp);
2881 {
2882 TurboAssembler::BlockPoolsScope block_pools(tasm(),
2883 case_count * kInstrSize);
2884 __ Bind(&table);
2885 for (size_t index = 0; index < case_count; ++index) {
2886 __ JumpTarget();
2887 __ B(GetLabel(i.InputRpo(index + 2)));
2888 }
2889 __ JumpTarget();
2890 }
2891 }
2892
FinishFrame(Frame * frame)2893 void CodeGenerator::FinishFrame(Frame* frame) {
2894 frame->AlignFrame(16);
2895 auto call_descriptor = linkage()->GetIncomingDescriptor();
2896
2897 // Save FP registers.
2898 CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2899 call_descriptor->CalleeSavedFPRegisters());
2900 int saved_count = saves_fp.Count();
2901 if (saved_count != 0) {
2902 DCHECK(saves_fp.list() == CPURegList::GetCalleeSavedV().list());
2903 DCHECK_EQ(saved_count % 2, 0);
2904 frame->AllocateSavedCalleeRegisterSlots(saved_count *
2905 (kDoubleSize / kSystemPointerSize));
2906 }
2907
2908 CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2909 call_descriptor->CalleeSavedRegisters());
2910 saved_count = saves.Count();
2911 if (saved_count != 0) {
2912 DCHECK_EQ(saved_count % 2, 0);
2913 frame->AllocateSavedCalleeRegisterSlots(saved_count);
2914 }
2915 }
2916
AssembleConstructFrame()2917 void CodeGenerator::AssembleConstructFrame() {
2918 auto call_descriptor = linkage()->GetIncomingDescriptor();
2919 __ AssertSpAligned();
2920
2921 // The frame has been previously padded in CodeGenerator::FinishFrame().
2922 DCHECK_EQ(frame()->GetTotalFrameSlotCount() % 2, 0);
2923 int required_slots =
2924 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
2925
2926 CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2927 call_descriptor->CalleeSavedRegisters());
2928 DCHECK_EQ(saves.Count() % 2, 0);
2929 CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2930 call_descriptor->CalleeSavedFPRegisters());
2931 DCHECK_EQ(saves_fp.Count() % 2, 0);
2932 // The number of slots for returns has to be even to ensure the correct stack
2933 // alignment.
2934 const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
2935
2936 if (frame_access_state()->has_frame()) {
2937 // Link the frame
2938 if (call_descriptor->IsJSFunctionCall()) {
2939 STATIC_ASSERT(InterpreterFrameConstants::kFixedFrameSize % 16 == 8);
2940 DCHECK_EQ(required_slots % 2, 1);
2941 __ Prologue();
2942 // Update required_slots count since we have just claimed one extra slot.
2943 STATIC_ASSERT(TurboAssembler::kExtraSlotClaimedByPrologue == 1);
2944 required_slots -= TurboAssembler::kExtraSlotClaimedByPrologue;
2945 } else {
2946 __ Push<TurboAssembler::kSignLR>(lr, fp);
2947 __ Mov(fp, sp);
2948 }
2949 unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
2950
2951 // Create OSR entry if applicable
2952 if (info()->is_osr()) {
2953 // TurboFan OSR-compiled functions cannot be entered directly.
2954 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
2955
2956 // Unoptimized code jumps directly to this entrypoint while the
2957 // unoptimized frame is still on the stack. Optimized code uses OSR values
2958 // directly from the unoptimized frame. Thus, all that needs to be done is
2959 // to allocate the remaining stack slots.
2960 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
2961 osr_pc_offset_ = __ pc_offset();
2962 size_t unoptimized_frame_slots = osr_helper()->UnoptimizedFrameSlots();
2963 DCHECK(call_descriptor->IsJSFunctionCall());
2964 DCHECK_EQ(unoptimized_frame_slots % 2, 1);
2965 // One unoptimized frame slot has already been claimed when the actual
2966 // arguments count was pushed.
2967 required_slots -=
2968 unoptimized_frame_slots - TurboAssembler::kExtraSlotClaimedByPrologue;
2969 ResetSpeculationPoison();
2970 }
2971
2972 if (info()->IsWasm() && required_slots > 128) {
2973 // For WebAssembly functions with big frames we have to do the stack
2974 // overflow check before we construct the frame. Otherwise we may not
2975 // have enough space on the stack to call the runtime for the stack
2976 // overflow.
2977 Label done;
2978 // If the frame is bigger than the stack, we throw the stack overflow
2979 // exception unconditionally. Thereby we can avoid the integer overflow
2980 // check in the condition code.
2981 if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
2982 UseScratchRegisterScope scope(tasm());
2983 Register scratch = scope.AcquireX();
2984 __ Ldr(scratch, FieldMemOperand(
2985 kWasmInstanceRegister,
2986 WasmInstanceObject::kRealStackLimitAddressOffset));
2987 __ Ldr(scratch, MemOperand(scratch));
2988 __ Add(scratch, scratch, required_slots * kSystemPointerSize);
2989 __ Cmp(sp, scratch);
2990 __ B(hs, &done);
2991 }
2992
2993 {
2994 // Finish the frame that hasn't been fully built yet.
2995 UseScratchRegisterScope temps(tasm());
2996 Register scratch = temps.AcquireX();
2997 __ Mov(scratch,
2998 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
2999 __ Push(scratch, kWasmInstanceRegister);
3000 }
3001
3002 __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
3003 // We come from WebAssembly, there are no references for the GC.
3004 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
3005 RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
3006 if (FLAG_debug_code) {
3007 __ Brk(0);
3008 }
3009 __ Bind(&done);
3010 }
3011
3012 // Skip callee-saved slots, which are pushed below.
3013 required_slots -= saves.Count();
3014 required_slots -= saves_fp.Count();
3015 required_slots -= returns;
3016
3017 // Build remainder of frame, including accounting for and filling-in
3018 // frame-specific header information, i.e. claiming the extra slot that
3019 // other platforms explicitly push for STUB (code object) frames and frames
3020 // recording their argument count.
3021 switch (call_descriptor->kind()) {
3022 case CallDescriptor::kCallJSFunction:
3023 __ Claim(required_slots);
3024 break;
3025 case CallDescriptor::kCallCodeObject: {
3026 UseScratchRegisterScope temps(tasm());
3027 Register scratch = temps.AcquireX();
3028 __ Mov(scratch,
3029 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3030 __ Push(scratch, padreg);
3031 // One of the extra slots has just been claimed when pushing the frame
3032 // type marker above. We also know that we have at least one slot to
3033 // claim here, as the typed frame has an odd number of fixed slots, and
3034 // all other parts of the total frame slots are even, leaving
3035 // {required_slots} to be odd.
3036 DCHECK_GE(required_slots, 1);
3037 __ Claim(required_slots - 1);
3038 } break;
3039 case CallDescriptor::kCallWasmFunction: {
3040 UseScratchRegisterScope temps(tasm());
3041 Register scratch = temps.AcquireX();
3042 __ Mov(scratch,
3043 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3044 __ Push(scratch, kWasmInstanceRegister);
3045 __ Claim(required_slots);
3046 } break;
3047 case CallDescriptor::kCallWasmImportWrapper:
3048 case CallDescriptor::kCallWasmCapiFunction: {
3049 UseScratchRegisterScope temps(tasm());
3050 __ LoadTaggedPointerField(
3051 kJSFunctionRegister,
3052 FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3053 __ LoadTaggedPointerField(
3054 kWasmInstanceRegister,
3055 FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3056 Register scratch = temps.AcquireX();
3057 __ Mov(scratch,
3058 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
3059 __ Push(scratch, kWasmInstanceRegister);
3060 int extra_slots =
3061 call_descriptor->kind() == CallDescriptor::kCallWasmImportWrapper
3062 ? 0 // Import wrapper: none.
3063 : 1; // C-API function: PC.
3064 __ Claim(required_slots + extra_slots);
3065 } break;
3066 case CallDescriptor::kCallAddress:
3067 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
3068 UseScratchRegisterScope temps(tasm());
3069 Register scratch = temps.AcquireX();
3070 __ Mov(scratch, StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY));
3071 __ Push(scratch, padreg);
3072 // The additional slot will be used for the saved c_entry_fp.
3073 }
3074 __ Claim(required_slots);
3075 break;
3076 default:
3077 UNREACHABLE();
3078 }
3079 }
3080
3081 // Save FP registers.
3082 DCHECK_IMPLIES(saves_fp.Count() != 0,
3083 saves_fp.list() == CPURegList::GetCalleeSavedV().list());
3084 __ PushCPURegList(saves_fp);
3085
3086 // Save registers.
3087 DCHECK_IMPLIES(!saves.IsEmpty(),
3088 saves.list() == CPURegList::GetCalleeSaved().list());
3089 __ PushCPURegList<TurboAssembler::kSignLR>(saves);
3090
3091 if (returns != 0) {
3092 __ Claim(returns);
3093 }
3094 }
3095
AssembleReturn(InstructionOperand * additional_pop_count)3096 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
3097 auto call_descriptor = linkage()->GetIncomingDescriptor();
3098
3099 const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
3100 if (returns != 0) {
3101 __ Drop(returns);
3102 }
3103
3104 // Restore registers.
3105 CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
3106 call_descriptor->CalleeSavedRegisters());
3107 __ PopCPURegList<TurboAssembler::kAuthLR>(saves);
3108
3109 // Restore fp registers.
3110 CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
3111 call_descriptor->CalleeSavedFPRegisters());
3112 __ PopCPURegList(saves_fp);
3113
3114 unwinding_info_writer_.MarkBlockWillExit();
3115
3116 // We might need x3 for scratch.
3117 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & x3.bit());
3118 const int parameter_count =
3119 static_cast<int>(call_descriptor->StackParameterCount());
3120 Arm64OperandConverter g(this, nullptr);
3121
3122 // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
3123 // Check RawMachineAssembler::PopAndReturn.
3124 if (parameter_count != 0) {
3125 if (additional_pop_count->IsImmediate()) {
3126 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
3127 } else if (__ emit_debug_code()) {
3128 __ cmp(g.ToRegister(additional_pop_count), Operand(0));
3129 __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
3130 }
3131 }
3132
3133 Register argc_reg = x3;
3134 #ifdef V8_NO_ARGUMENTS_ADAPTOR
3135 // Functions with JS linkage have at least one parameter (the receiver).
3136 // If {parameter_count} == 0, it means it is a builtin with
3137 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
3138 // itself.
3139 const bool drop_jsargs = frame_access_state()->has_frame() &&
3140 call_descriptor->IsJSFunctionCall() &&
3141 parameter_count != 0;
3142 #else
3143 const bool drop_jsargs = false;
3144 #endif
3145 if (call_descriptor->IsCFunctionCall()) {
3146 AssembleDeconstructFrame();
3147 } else if (frame_access_state()->has_frame()) {
3148 // Canonicalize JSFunction return sites for now unless they have an variable
3149 // number of stack slot pops.
3150 if (additional_pop_count->IsImmediate() &&
3151 g.ToConstant(additional_pop_count).ToInt32() == 0) {
3152 if (return_label_.is_bound()) {
3153 __ B(&return_label_);
3154 return;
3155 } else {
3156 __ Bind(&return_label_);
3157 }
3158 }
3159 if (drop_jsargs) {
3160 // Get the actual argument count.
3161 __ Ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
3162 }
3163 AssembleDeconstructFrame();
3164 }
3165
3166 if (drop_jsargs) {
3167 // We must pop all arguments from the stack (including the receiver). This
3168 // number of arguments is given by max(1 + argc_reg, parameter_count).
3169 Label argc_reg_has_final_count;
3170 __ Add(argc_reg, argc_reg, 1); // Consider the receiver.
3171 if (parameter_count > 1) {
3172 __ Cmp(argc_reg, Operand(parameter_count));
3173 __ B(&argc_reg_has_final_count, ge);
3174 __ Mov(argc_reg, Operand(parameter_count));
3175 __ Bind(&argc_reg_has_final_count);
3176 }
3177 __ DropArguments(argc_reg);
3178 } else if (additional_pop_count->IsImmediate()) {
3179 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
3180 __ DropArguments(parameter_count + additional_count);
3181 } else if (parameter_count == 0) {
3182 __ DropArguments(g.ToRegister(additional_pop_count));
3183 } else {
3184 // {additional_pop_count} is guaranteed to be zero if {parameter_count !=
3185 // 0}. Check RawMachineAssembler::PopAndReturn.
3186 __ DropArguments(parameter_count);
3187 }
3188 __ AssertSpAligned();
3189 __ Ret();
3190 }
3191
FinishCode()3192 void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); }
3193
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)3194 void CodeGenerator::PrepareForDeoptimizationExits(
3195 ZoneDeque<DeoptimizationExit*>* exits) {
3196 __ ForceConstantPoolEmissionWithoutJump();
3197 // We are conservative here, assuming all deopts are lazy deopts.
3198 DCHECK_GE(Deoptimizer::kLazyDeoptExitSize,
3199 Deoptimizer::kNonLazyDeoptExitSize);
3200 __ CheckVeneerPool(
3201 false, false,
3202 static_cast<int>(exits->size()) * Deoptimizer::kLazyDeoptExitSize);
3203
3204 // Check which deopt kinds exist in this Code object, to avoid emitting jumps
3205 // to unused entries.
3206 bool saw_deopt_kind[kDeoptimizeKindCount] = {false};
3207 for (auto exit : *exits) {
3208 saw_deopt_kind[static_cast<int>(exit->kind())] = true;
3209 }
3210
3211 // Emit the jumps to deoptimization entries.
3212 UseScratchRegisterScope scope(tasm());
3213 Register scratch = scope.AcquireX();
3214 STATIC_ASSERT(static_cast<int>(kFirstDeoptimizeKind) == 0);
3215 for (int i = 0; i < kDeoptimizeKindCount; i++) {
3216 if (!saw_deopt_kind[i]) continue;
3217 __ bind(&jump_deoptimization_entry_labels_[i]);
3218 __ LoadEntryFromBuiltinIndex(Deoptimizer::GetDeoptimizationEntry(
3219 isolate(), static_cast<DeoptimizeKind>(i)),
3220 scratch);
3221 __ Jump(scratch);
3222 }
3223 }
3224
AssembleMove(InstructionOperand * source,InstructionOperand * destination)3225 void CodeGenerator::AssembleMove(InstructionOperand* source,
3226 InstructionOperand* destination) {
3227 Arm64OperandConverter g(this, nullptr);
3228 // Helper function to write the given constant to the dst register.
3229 auto MoveConstantToRegister = [&](Register dst, Constant src) {
3230 if (src.type() == Constant::kHeapObject) {
3231 Handle<HeapObject> src_object = src.ToHeapObject();
3232 RootIndex index;
3233 if (IsMaterializableFromRoot(src_object, &index)) {
3234 __ LoadRoot(dst, index);
3235 } else {
3236 __ Mov(dst, src_object);
3237 }
3238 } else if (src.type() == Constant::kCompressedHeapObject) {
3239 Handle<HeapObject> src_object = src.ToHeapObject();
3240 RootIndex index;
3241 if (IsMaterializableFromRoot(src_object, &index)) {
3242 __ LoadRoot(dst, index);
3243 } else {
3244 // TODO(v8:8977): Even though this mov happens on 32 bits (Note the
3245 // .W()) and we are passing along the RelocInfo, we still haven't made
3246 // the address embedded in the code-stream actually be compressed.
3247 __ Mov(dst.W(),
3248 Immediate(src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT));
3249 }
3250 } else {
3251 __ Mov(dst, g.ToImmediate(source));
3252 }
3253 };
3254 switch (MoveType::InferMove(source, destination)) {
3255 case MoveType::kRegisterToRegister:
3256 if (source->IsRegister()) {
3257 __ Mov(g.ToRegister(destination), g.ToRegister(source));
3258 } else if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3259 __ Mov(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3260 } else {
3261 DCHECK(source->IsSimd128Register());
3262 __ Mov(g.ToDoubleRegister(destination).Q(),
3263 g.ToDoubleRegister(source).Q());
3264 }
3265 return;
3266 case MoveType::kRegisterToStack: {
3267 MemOperand dst = g.ToMemOperand(destination, tasm());
3268 if (source->IsRegister()) {
3269 __ Str(g.ToRegister(source), dst);
3270 } else {
3271 VRegister src = g.ToDoubleRegister(source);
3272 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3273 __ Str(src, dst);
3274 } else {
3275 DCHECK(source->IsSimd128Register());
3276 __ Str(src.Q(), dst);
3277 }
3278 }
3279 return;
3280 }
3281 case MoveType::kStackToRegister: {
3282 MemOperand src = g.ToMemOperand(source, tasm());
3283 if (destination->IsRegister()) {
3284 __ Ldr(g.ToRegister(destination), src);
3285 } else {
3286 VRegister dst = g.ToDoubleRegister(destination);
3287 if (destination->IsFloatRegister() || destination->IsDoubleRegister()) {
3288 __ Ldr(dst, src);
3289 } else {
3290 DCHECK(destination->IsSimd128Register());
3291 __ Ldr(dst.Q(), src);
3292 }
3293 }
3294 return;
3295 }
3296 case MoveType::kStackToStack: {
3297 MemOperand src = g.ToMemOperand(source, tasm());
3298 MemOperand dst = g.ToMemOperand(destination, tasm());
3299 if (source->IsSimd128StackSlot()) {
3300 UseScratchRegisterScope scope(tasm());
3301 VRegister temp = scope.AcquireQ();
3302 __ Ldr(temp, src);
3303 __ Str(temp, dst);
3304 } else {
3305 UseScratchRegisterScope scope(tasm());
3306 Register temp = scope.AcquireX();
3307 __ Ldr(temp, src);
3308 __ Str(temp, dst);
3309 }
3310 return;
3311 }
3312 case MoveType::kConstantToRegister: {
3313 Constant src = g.ToConstant(source);
3314 if (destination->IsRegister()) {
3315 MoveConstantToRegister(g.ToRegister(destination), src);
3316 } else {
3317 VRegister dst = g.ToDoubleRegister(destination);
3318 if (destination->IsFloatRegister()) {
3319 __ Fmov(dst.S(), src.ToFloat32());
3320 } else {
3321 DCHECK(destination->IsDoubleRegister());
3322 __ Fmov(dst, src.ToFloat64().value());
3323 }
3324 }
3325 return;
3326 }
3327 case MoveType::kConstantToStack: {
3328 Constant src = g.ToConstant(source);
3329 MemOperand dst = g.ToMemOperand(destination, tasm());
3330 if (destination->IsStackSlot()) {
3331 UseScratchRegisterScope scope(tasm());
3332 Register temp = scope.AcquireX();
3333 MoveConstantToRegister(temp, src);
3334 __ Str(temp, dst);
3335 } else if (destination->IsFloatStackSlot()) {
3336 if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
3337 __ Str(wzr, dst);
3338 } else {
3339 UseScratchRegisterScope scope(tasm());
3340 VRegister temp = scope.AcquireS();
3341 __ Fmov(temp, src.ToFloat32());
3342 __ Str(temp, dst);
3343 }
3344 } else {
3345 DCHECK(destination->IsDoubleStackSlot());
3346 if (src.ToFloat64().AsUint64() == 0) {
3347 __ Str(xzr, dst);
3348 } else {
3349 UseScratchRegisterScope scope(tasm());
3350 VRegister temp = scope.AcquireD();
3351 __ Fmov(temp, src.ToFloat64().value());
3352 __ Str(temp, dst);
3353 }
3354 }
3355 return;
3356 }
3357 }
3358 UNREACHABLE();
3359 }
3360
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)3361 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3362 InstructionOperand* destination) {
3363 Arm64OperandConverter g(this, nullptr);
3364 switch (MoveType::InferSwap(source, destination)) {
3365 case MoveType::kRegisterToRegister:
3366 if (source->IsRegister()) {
3367 __ Swap(g.ToRegister(source), g.ToRegister(destination));
3368 } else {
3369 VRegister src = g.ToDoubleRegister(source);
3370 VRegister dst = g.ToDoubleRegister(destination);
3371 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3372 __ Swap(src, dst);
3373 } else {
3374 DCHECK(source->IsSimd128Register());
3375 __ Swap(src.Q(), dst.Q());
3376 }
3377 }
3378 return;
3379 case MoveType::kRegisterToStack: {
3380 UseScratchRegisterScope scope(tasm());
3381 MemOperand dst = g.ToMemOperand(destination, tasm());
3382 if (source->IsRegister()) {
3383 Register temp = scope.AcquireX();
3384 Register src = g.ToRegister(source);
3385 __ Mov(temp, src);
3386 __ Ldr(src, dst);
3387 __ Str(temp, dst);
3388 } else {
3389 UseScratchRegisterScope scope(tasm());
3390 VRegister src = g.ToDoubleRegister(source);
3391 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3392 VRegister temp = scope.AcquireD();
3393 __ Mov(temp, src);
3394 __ Ldr(src, dst);
3395 __ Str(temp, dst);
3396 } else {
3397 DCHECK(source->IsSimd128Register());
3398 VRegister temp = scope.AcquireQ();
3399 __ Mov(temp, src.Q());
3400 __ Ldr(src.Q(), dst);
3401 __ Str(temp, dst);
3402 }
3403 }
3404 return;
3405 }
3406 case MoveType::kStackToStack: {
3407 UseScratchRegisterScope scope(tasm());
3408 MemOperand src = g.ToMemOperand(source, tasm());
3409 MemOperand dst = g.ToMemOperand(destination, tasm());
3410 VRegister temp_0 = scope.AcquireD();
3411 VRegister temp_1 = scope.AcquireD();
3412 if (source->IsSimd128StackSlot()) {
3413 __ Ldr(temp_0.Q(), src);
3414 __ Ldr(temp_1.Q(), dst);
3415 __ Str(temp_0.Q(), dst);
3416 __ Str(temp_1.Q(), src);
3417 } else {
3418 __ Ldr(temp_0, src);
3419 __ Ldr(temp_1, dst);
3420 __ Str(temp_0, dst);
3421 __ Str(temp_1, src);
3422 }
3423 return;
3424 }
3425 default:
3426 UNREACHABLE();
3427 }
3428 }
3429
AssembleJumpTable(Label ** targets,size_t target_count)3430 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3431 // On 64-bit ARM we emit the jump tables inline.
3432 UNREACHABLE();
3433 }
3434
3435 #undef __
3436
3437 } // namespace compiler
3438 } // namespace internal
3439 } // namespace v8
3440