1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
6 #define V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
7
8 #include "src/heap/memory-chunk.h"
9 #include "src/wasm/baseline/liftoff-assembler.h"
10
11 namespace v8 {
12 namespace internal {
13 namespace wasm {
14
15 namespace liftoff {
16
17 // Liftoff Frames.
18 //
19 // slot Frame
20 // +--------------------+---------------------------
21 // n+4 | optional padding slot to keep the stack 16 byte aligned.
22 // n+3 | parameter n |
23 // ... | ... |
24 // 4 | parameter 1 | or parameter 2
25 // 3 | parameter 0 | or parameter 1
26 // 2 | (result address) | or parameter 0
27 // -----+--------------------+---------------------------
28 // 1 | return addr (lr) |
29 // 0 | previous frame (fp)|
30 // -----+--------------------+ <-- frame ptr (fp)
31 // -1 | 0xa: WASM |
32 // -2 | instance |
33 // -----+--------------------+---------------------------
34 // -3 | slot 0 | ^
35 // -4 | slot 1 | |
36 // | | Frame slots
37 // | | |
38 // | | v
39 // | optional padding slot to keep the stack 16 byte aligned.
40 // -----+--------------------+ <-- stack ptr (sp)
41 //
42
43 constexpr int kInstanceOffset = 2 * kSystemPointerSize;
44
GetStackSlot(int offset)45 inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
46
GetInstanceOperand()47 inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
48
GetRegFromType(const LiftoffRegister & reg,ValueType type)49 inline CPURegister GetRegFromType(const LiftoffRegister& reg, ValueType type) {
50 switch (type.kind()) {
51 case ValueType::kI32:
52 return reg.gp().W();
53 case ValueType::kI64:
54 case ValueType::kRef:
55 case ValueType::kOptRef:
56 return reg.gp().X();
57 case ValueType::kF32:
58 return reg.fp().S();
59 case ValueType::kF64:
60 return reg.fp().D();
61 case ValueType::kS128:
62 return reg.fp().Q();
63 default:
64 UNREACHABLE();
65 }
66 }
67
PadRegList(RegList list)68 inline CPURegList PadRegList(RegList list) {
69 if ((base::bits::CountPopulation(list) & 1) != 0) list |= padreg.bit();
70 return CPURegList(CPURegister::kRegister, kXRegSizeInBits, list);
71 }
72
PadVRegList(RegList list)73 inline CPURegList PadVRegList(RegList list) {
74 if ((base::bits::CountPopulation(list) & 1) != 0) list |= fp_scratch.bit();
75 return CPURegList(CPURegister::kVRegister, kQRegSizeInBits, list);
76 }
77
AcquireByType(UseScratchRegisterScope * temps,ValueType type)78 inline CPURegister AcquireByType(UseScratchRegisterScope* temps,
79 ValueType type) {
80 switch (type.kind()) {
81 case ValueType::kI32:
82 return temps->AcquireW();
83 case ValueType::kI64:
84 return temps->AcquireX();
85 case ValueType::kF32:
86 return temps->AcquireS();
87 case ValueType::kF64:
88 return temps->AcquireD();
89 default:
90 UNREACHABLE();
91 }
92 }
93
94 template <typename T>
GetMemOp(LiftoffAssembler * assm,UseScratchRegisterScope * temps,Register addr,Register offset,T offset_imm)95 inline MemOperand GetMemOp(LiftoffAssembler* assm,
96 UseScratchRegisterScope* temps, Register addr,
97 Register offset, T offset_imm) {
98 if (offset.is_valid()) {
99 if (offset_imm == 0) return MemOperand(addr.X(), offset.W(), UXTW);
100 Register tmp = temps->AcquireW();
101 assm->Add(tmp, offset.W(), offset_imm);
102 return MemOperand(addr.X(), tmp, UXTW);
103 }
104 return MemOperand(addr.X(), offset_imm);
105 }
106
107 enum class ShiftDirection : bool { kLeft, kRight };
108
109 enum class ShiftSign : bool { kSigned, kUnsigned };
110
111 template <ShiftDirection dir, ShiftSign sign = ShiftSign::kSigned>
EmitSimdShift(LiftoffAssembler * assm,VRegister dst,VRegister lhs,Register rhs,VectorFormat format)112 inline void EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs,
113 Register rhs, VectorFormat format) {
114 DCHECK_IMPLIES(dir == ShiftDirection::kLeft, sign == ShiftSign::kSigned);
115 DCHECK(dst.IsSameFormat(lhs));
116 DCHECK_EQ(dst.LaneCount(), LaneCountFromFormat(format));
117
118 UseScratchRegisterScope temps(assm);
119 VRegister tmp = temps.AcquireV(format);
120 Register shift = dst.Is2D() ? temps.AcquireX() : temps.AcquireW();
121 int mask = LaneSizeInBitsFromFormat(format) - 1;
122 assm->And(shift, rhs, mask);
123 assm->Dup(tmp, shift);
124
125 if (dir == ShiftDirection::kRight) {
126 assm->Neg(tmp, tmp);
127 }
128
129 if (sign == ShiftSign::kSigned) {
130 assm->Sshl(dst, lhs, tmp);
131 } else {
132 assm->Ushl(dst, lhs, tmp);
133 }
134 }
135
136 template <VectorFormat format, ShiftSign sign>
EmitSimdShiftRightImmediate(LiftoffAssembler * assm,VRegister dst,VRegister lhs,int32_t rhs)137 inline void EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst,
138 VRegister lhs, int32_t rhs) {
139 // Sshr and Ushr does not allow shifts to be 0, so check for that here.
140 int mask = LaneSizeInBitsFromFormat(format) - 1;
141 int32_t shift = rhs & mask;
142 if (!shift) {
143 if (dst != lhs) {
144 assm->Mov(dst, lhs);
145 }
146 return;
147 }
148
149 if (sign == ShiftSign::kSigned) {
150 assm->Sshr(dst, lhs, rhs & mask);
151 } else {
152 assm->Ushr(dst, lhs, rhs & mask);
153 }
154 }
155
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)156 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
157 LiftoffRegister src) {
158 // AnyTrue does not depend on the number of lanes, so we can use V4S for all.
159 UseScratchRegisterScope scope(assm);
160 VRegister temp = scope.AcquireV(kFormatS);
161 assm->Umaxv(temp, src.fp().V4S());
162 assm->Umov(dst.gp().W(), temp, 0);
163 assm->Cmp(dst.gp().W(), 0);
164 assm->Cset(dst.gp().W(), ne);
165 }
166
EmitAllTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src,VectorFormat format)167 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
168 LiftoffRegister src, VectorFormat format) {
169 UseScratchRegisterScope scope(assm);
170 VRegister temp = scope.AcquireV(ScalarFormatFromFormat(format));
171 assm->Uminv(temp, VRegister::Create(src.fp().code(), format));
172 assm->Umov(dst.gp().W(), temp, 0);
173 assm->Cmp(dst.gp().W(), 0);
174 assm->Cset(dst.gp().W(), ne);
175 }
176
177 } // namespace liftoff
178
PrepareStackFrame()179 int LiftoffAssembler::PrepareStackFrame() {
180 int offset = pc_offset();
181 InstructionAccurateScope scope(this, 1);
182 sub(sp, sp, 0);
183 return offset;
184 }
185
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)186 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
187 int stack_param_delta) {
188 UseScratchRegisterScope temps(this);
189 temps.Exclude(x16, x17);
190
191 // This is the previous stack pointer value (before we push the lr and the
192 // fp). We need to keep it to autenticate the lr and adjust the new stack
193 // pointer afterwards.
194 Add(x16, fp, 16);
195
196 // Load the fp and lr of the old frame, they will be pushed in the new frame
197 // during the actual call.
198 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
199 Ldp(fp, x17, MemOperand(fp));
200 Autib1716();
201 Mov(lr, x17);
202 #else
203 Ldp(fp, lr, MemOperand(fp));
204 #endif
205
206 temps.Include(x17);
207
208 Register scratch = temps.AcquireX();
209
210 // Shift the whole frame upwards, except for fp and lr.
211 int slot_count = num_callee_stack_params;
212 for (int i = slot_count - 1; i >= 0; --i) {
213 ldr(scratch, MemOperand(sp, i * 8));
214 str(scratch, MemOperand(x16, (i - stack_param_delta) * 8));
215 }
216
217 // Set the new stack pointer.
218 Sub(sp, x16, stack_param_delta * 8);
219 }
220
PatchPrepareStackFrame(int offset,int frame_size)221 void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) {
222 static_assert(kStackSlotSize == kXRegSize,
223 "kStackSlotSize must equal kXRegSize");
224 // The stack pointer is required to be quadword aligned.
225 // Misalignment will cause a stack alignment fault.
226 frame_size = RoundUp(frame_size, kQuadWordSizeInBytes);
227 if (!IsImmAddSub(frame_size)) {
228 // Round the stack to a page to try to fit a add/sub immediate.
229 frame_size = RoundUp(frame_size, 0x1000);
230 if (!IsImmAddSub(frame_size)) {
231 // Stack greater than 4M! Because this is a quite improbable case, we
232 // just fallback to TurboFan.
233 bailout(kOtherReason, "Stack too big");
234 return;
235 }
236 }
237 #ifdef USE_SIMULATOR
238 // When using the simulator, deal with Liftoff which allocates the stack
239 // before checking it.
240 // TODO(arm): Remove this when the stack check mechanism will be updated.
241 if (frame_size > KB / 2) {
242 bailout(kOtherReason,
243 "Stack limited to 512 bytes to avoid a bug in StackCheck");
244 return;
245 }
246 #endif
247 PatchingAssembler patching_assembler(AssemblerOptions{},
248 buffer_start_ + offset, 1);
249 #if V8_OS_WIN
250 if (frame_size > kStackPageSize) {
251 // Generate OOL code (at the end of the function, where the current
252 // assembler is pointing) to do the explicit stack limit check (see
253 // https://docs.microsoft.com/en-us/previous-versions/visualstudio/
254 // visual-studio-6.0/aa227153(v=vs.60)).
255 // At the function start, emit a jump to that OOL code (from {offset} to
256 // {pc_offset()}).
257 int ool_offset = pc_offset() - offset;
258 patching_assembler.b(ool_offset >> kInstrSizeLog2);
259
260 // Now generate the OOL code.
261 Claim(frame_size, 1);
262 // Jump back to the start of the function (from {pc_offset()} to {offset +
263 // kInstrSize}).
264 int func_start_offset = offset + kInstrSize - pc_offset();
265 b(func_start_offset >> kInstrSizeLog2);
266 return;
267 }
268 #endif
269 patching_assembler.PatchSubSp(frame_size);
270 }
271
FinishCode()272 void LiftoffAssembler::FinishCode() { ForceConstantPoolEmissionWithoutJump(); }
273
AbortCompilation()274 void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
275
276 // static
StaticStackFrameSize()277 constexpr int LiftoffAssembler::StaticStackFrameSize() {
278 return liftoff::kInstanceOffset;
279 }
280
SlotSizeForType(ValueType type)281 int LiftoffAssembler::SlotSizeForType(ValueType type) {
282 // TODO(zhin): Unaligned access typically take additional cycles, we should do
283 // some performance testing to see how big an effect it will take.
284 switch (type.kind()) {
285 case ValueType::kS128:
286 return type.element_size_bytes();
287 default:
288 return kStackSlotSize;
289 }
290 }
291
NeedsAlignment(ValueType type)292 bool LiftoffAssembler::NeedsAlignment(ValueType type) {
293 return type.kind() == ValueType::kS128 || type.is_reference_type();
294 }
295
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)296 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
297 RelocInfo::Mode rmode) {
298 switch (value.type().kind()) {
299 case ValueType::kI32:
300 Mov(reg.gp().W(), Immediate(value.to_i32(), rmode));
301 break;
302 case ValueType::kI64:
303 Mov(reg.gp().X(), Immediate(value.to_i64(), rmode));
304 break;
305 case ValueType::kF32:
306 Fmov(reg.fp().S(), value.to_f32_boxed().get_scalar());
307 break;
308 case ValueType::kF64:
309 Fmov(reg.fp().D(), value.to_f64_boxed().get_scalar());
310 break;
311 default:
312 UNREACHABLE();
313 }
314 }
315
LoadFromInstance(Register dst,int offset,int size)316 void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
317 DCHECK_LE(0, offset);
318 Ldr(dst, liftoff::GetInstanceOperand());
319 DCHECK(size == 4 || size == 8);
320 if (size == 4) {
321 Ldr(dst.W(), MemOperand(dst, offset));
322 } else {
323 Ldr(dst, MemOperand(dst, offset));
324 }
325 }
326
LoadTaggedPointerFromInstance(Register dst,int offset)327 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) {
328 DCHECK_LE(0, offset);
329 Ldr(dst, liftoff::GetInstanceOperand());
330 LoadTaggedPointerField(dst, MemOperand(dst, offset));
331 }
332
SpillInstance(Register instance)333 void LiftoffAssembler::SpillInstance(Register instance) {
334 Str(instance, liftoff::GetInstanceOperand());
335 }
336
FillInstanceInto(Register dst)337 void LiftoffAssembler::FillInstanceInto(Register dst) {
338 Ldr(dst, liftoff::GetInstanceOperand());
339 }
340
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)341 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
342 Register offset_reg,
343 int32_t offset_imm,
344 LiftoffRegList pinned) {
345 UseScratchRegisterScope temps(this);
346 MemOperand src_op =
347 liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
348 LoadTaggedPointerField(dst, src_op);
349 }
350
StoreTaggedPointer(Register dst_addr,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned)351 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
352 int32_t offset_imm,
353 LiftoffRegister src,
354 LiftoffRegList pinned) {
355 // Store the value.
356 MemOperand dst_op(dst_addr, offset_imm);
357 StoreTaggedField(src.gp(), dst_op);
358 // The write barrier.
359 Label write_barrier;
360 Label exit;
361 CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, eq,
362 &write_barrier);
363 b(&exit);
364 bind(&write_barrier);
365 JumpIfSmi(src.gp(), &exit);
366 if (COMPRESS_POINTERS_BOOL) {
367 DecompressTaggedPointer(src.gp(), src.gp());
368 }
369 CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, ne,
370 &exit);
371 CallRecordWriteStub(dst_addr, Operand(offset_imm), EMIT_REMEMBERED_SET,
372 kSaveFPRegs, wasm::WasmCode::kRecordWrite);
373 bind(&exit);
374 }
375
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem)376 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
377 Register offset_reg, uint32_t offset_imm,
378 LoadType type, LiftoffRegList pinned,
379 uint32_t* protected_load_pc, bool is_load_mem) {
380 UseScratchRegisterScope temps(this);
381 MemOperand src_op =
382 liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
383 if (protected_load_pc) *protected_load_pc = pc_offset();
384 switch (type.value()) {
385 case LoadType::kI32Load8U:
386 case LoadType::kI64Load8U:
387 Ldrb(dst.gp().W(), src_op);
388 break;
389 case LoadType::kI32Load8S:
390 Ldrsb(dst.gp().W(), src_op);
391 break;
392 case LoadType::kI64Load8S:
393 Ldrsb(dst.gp().X(), src_op);
394 break;
395 case LoadType::kI32Load16U:
396 case LoadType::kI64Load16U:
397 Ldrh(dst.gp().W(), src_op);
398 break;
399 case LoadType::kI32Load16S:
400 Ldrsh(dst.gp().W(), src_op);
401 break;
402 case LoadType::kI64Load16S:
403 Ldrsh(dst.gp().X(), src_op);
404 break;
405 case LoadType::kI32Load:
406 case LoadType::kI64Load32U:
407 Ldr(dst.gp().W(), src_op);
408 break;
409 case LoadType::kI64Load32S:
410 Ldrsw(dst.gp().X(), src_op);
411 break;
412 case LoadType::kI64Load:
413 Ldr(dst.gp().X(), src_op);
414 break;
415 case LoadType::kF32Load:
416 Ldr(dst.fp().S(), src_op);
417 break;
418 case LoadType::kF64Load:
419 Ldr(dst.fp().D(), src_op);
420 break;
421 case LoadType::kS128Load:
422 Ldr(dst.fp().Q(), src_op);
423 break;
424 }
425 }
426
Store(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned,uint32_t * protected_store_pc,bool is_store_mem)427 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
428 uint32_t offset_imm, LiftoffRegister src,
429 StoreType type, LiftoffRegList pinned,
430 uint32_t* protected_store_pc, bool is_store_mem) {
431 UseScratchRegisterScope temps(this);
432 MemOperand dst_op =
433 liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
434 if (protected_store_pc) *protected_store_pc = pc_offset();
435 switch (type.value()) {
436 case StoreType::kI32Store8:
437 case StoreType::kI64Store8:
438 Strb(src.gp().W(), dst_op);
439 break;
440 case StoreType::kI32Store16:
441 case StoreType::kI64Store16:
442 Strh(src.gp().W(), dst_op);
443 break;
444 case StoreType::kI32Store:
445 case StoreType::kI64Store32:
446 Str(src.gp().W(), dst_op);
447 break;
448 case StoreType::kI64Store:
449 Str(src.gp().X(), dst_op);
450 break;
451 case StoreType::kF32Store:
452 Str(src.fp().S(), dst_op);
453 break;
454 case StoreType::kF64Store:
455 Str(src.fp().D(), dst_op);
456 break;
457 case StoreType::kS128Store:
458 Str(src.fp().Q(), dst_op);
459 break;
460 }
461 }
462
463 namespace liftoff {
464 #define __ lasm->
465
CalculateActualAddress(LiftoffAssembler * lasm,Register addr_reg,Register offset_reg,int32_t offset_imm,Register result_reg)466 inline Register CalculateActualAddress(LiftoffAssembler* lasm,
467 Register addr_reg, Register offset_reg,
468 int32_t offset_imm,
469 Register result_reg) {
470 DCHECK_NE(offset_reg, no_reg);
471 DCHECK_NE(addr_reg, no_reg);
472 __ Add(result_reg, addr_reg, Operand(offset_reg));
473 if (offset_imm != 0) {
474 __ Add(result_reg, result_reg, Operand(offset_imm));
475 }
476 return result_reg;
477 }
478
479 enum class Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
480
AtomicBinop(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type,Binop op)481 inline void AtomicBinop(LiftoffAssembler* lasm, Register dst_addr,
482 Register offset_reg, uint32_t offset_imm,
483 LiftoffRegister value, LiftoffRegister result,
484 StoreType type, Binop op) {
485 LiftoffRegList pinned =
486 LiftoffRegList::ForRegs(dst_addr, offset_reg, value, result);
487 Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
488
489 // Make sure that {result} is unique.
490 Register result_reg = result.gp();
491 if (result_reg == value.gp() || result_reg == dst_addr ||
492 result_reg == offset_reg) {
493 result_reg = __ GetUnusedRegister(kGpReg, pinned).gp();
494 }
495
496 UseScratchRegisterScope temps(lasm);
497 Register actual_addr = liftoff::CalculateActualAddress(
498 lasm, dst_addr, offset_reg, offset_imm, temps.AcquireX());
499
500 // Allocate an additional {temp} register to hold the result that should be
501 // stored to memory. Note that {temp} and {store_result} are not allowed to be
502 // the same register.
503 Register temp = temps.AcquireX();
504
505 Label retry;
506 __ Bind(&retry);
507 switch (type.value()) {
508 case StoreType::kI64Store8:
509 case StoreType::kI32Store8:
510 __ ldaxrb(result_reg.W(), actual_addr);
511 break;
512 case StoreType::kI64Store16:
513 case StoreType::kI32Store16:
514 __ ldaxrh(result_reg.W(), actual_addr);
515 break;
516 case StoreType::kI64Store32:
517 case StoreType::kI32Store:
518 __ ldaxr(result_reg.W(), actual_addr);
519 break;
520 case StoreType::kI64Store:
521 __ ldaxr(result_reg.X(), actual_addr);
522 break;
523 default:
524 UNREACHABLE();
525 }
526
527 switch (op) {
528 case Binop::kAdd:
529 __ add(temp, result_reg, value.gp());
530 break;
531 case Binop::kSub:
532 __ sub(temp, result_reg, value.gp());
533 break;
534 case Binop::kAnd:
535 __ and_(temp, result_reg, value.gp());
536 break;
537 case Binop::kOr:
538 __ orr(temp, result_reg, value.gp());
539 break;
540 case Binop::kXor:
541 __ eor(temp, result_reg, value.gp());
542 break;
543 case Binop::kExchange:
544 __ mov(temp, value.gp());
545 break;
546 }
547
548 switch (type.value()) {
549 case StoreType::kI64Store8:
550 case StoreType::kI32Store8:
551 __ stlxrb(store_result.W(), temp.W(), actual_addr);
552 break;
553 case StoreType::kI64Store16:
554 case StoreType::kI32Store16:
555 __ stlxrh(store_result.W(), temp.W(), actual_addr);
556 break;
557 case StoreType::kI64Store32:
558 case StoreType::kI32Store:
559 __ stlxr(store_result.W(), temp.W(), actual_addr);
560 break;
561 case StoreType::kI64Store:
562 __ stlxr(store_result.W(), temp.X(), actual_addr);
563 break;
564 default:
565 UNREACHABLE();
566 }
567
568 __ Cbnz(store_result.W(), &retry);
569
570 if (result_reg != result.gp()) {
571 __ mov(result.gp(), result_reg);
572 }
573 }
574
575 #undef __
576 } // namespace liftoff
577
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned)578 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
579 Register offset_reg, uint32_t offset_imm,
580 LoadType type, LiftoffRegList pinned) {
581 UseScratchRegisterScope temps(this);
582 Register src_reg = liftoff::CalculateActualAddress(
583 this, src_addr, offset_reg, offset_imm, temps.AcquireX());
584 switch (type.value()) {
585 case LoadType::kI32Load8U:
586 case LoadType::kI64Load8U:
587 Ldarb(dst.gp().W(), src_reg);
588 return;
589 case LoadType::kI32Load16U:
590 case LoadType::kI64Load16U:
591 Ldarh(dst.gp().W(), src_reg);
592 return;
593 case LoadType::kI32Load:
594 case LoadType::kI64Load32U:
595 Ldar(dst.gp().W(), src_reg);
596 return;
597 case LoadType::kI64Load:
598 Ldar(dst.gp().X(), src_reg);
599 return;
600 default:
601 UNREACHABLE();
602 }
603 }
604
AtomicStore(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)605 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
606 uint32_t offset_imm, LiftoffRegister src,
607 StoreType type, LiftoffRegList pinned) {
608 UseScratchRegisterScope temps(this);
609 Register dst_reg = liftoff::CalculateActualAddress(
610 this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
611 switch (type.value()) {
612 case StoreType::kI64Store8:
613 case StoreType::kI32Store8:
614 Stlrb(src.gp().W(), dst_reg);
615 return;
616 case StoreType::kI64Store16:
617 case StoreType::kI32Store16:
618 Stlrh(src.gp().W(), dst_reg);
619 return;
620 case StoreType::kI64Store32:
621 case StoreType::kI32Store:
622 Stlr(src.gp().W(), dst_reg);
623 return;
624 case StoreType::kI64Store:
625 Stlr(src.gp().X(), dst_reg);
626 return;
627 default:
628 UNREACHABLE();
629 }
630 }
631
AtomicAdd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)632 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
633 uint32_t offset_imm, LiftoffRegister value,
634 LiftoffRegister result, StoreType type) {
635 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
636 type, liftoff::Binop::kAdd);
637 }
638
AtomicSub(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)639 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
640 uint32_t offset_imm, LiftoffRegister value,
641 LiftoffRegister result, StoreType type) {
642 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
643 type, liftoff::Binop::kSub);
644 }
645
AtomicAnd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)646 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
647 uint32_t offset_imm, LiftoffRegister value,
648 LiftoffRegister result, StoreType type) {
649 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
650 type, liftoff::Binop::kAnd);
651 }
652
AtomicOr(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)653 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
654 uint32_t offset_imm, LiftoffRegister value,
655 LiftoffRegister result, StoreType type) {
656 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
657 type, liftoff::Binop::kOr);
658 }
659
AtomicXor(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)660 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
661 uint32_t offset_imm, LiftoffRegister value,
662 LiftoffRegister result, StoreType type) {
663 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
664 type, liftoff::Binop::kXor);
665 }
666
AtomicExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)667 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
668 uint32_t offset_imm,
669 LiftoffRegister value,
670 LiftoffRegister result, StoreType type) {
671 liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
672 type, liftoff::Binop::kExchange);
673 }
674
AtomicCompareExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)675 void LiftoffAssembler::AtomicCompareExchange(
676 Register dst_addr, Register offset_reg, uint32_t offset_imm,
677 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
678 StoreType type) {
679 LiftoffRegList pinned =
680 LiftoffRegList::ForRegs(dst_addr, offset_reg, expected, new_value);
681
682 Register result_reg = result.gp();
683 if (pinned.has(result)) {
684 result_reg = GetUnusedRegister(kGpReg, pinned).gp();
685 }
686
687 UseScratchRegisterScope temps(this);
688
689 Register actual_addr = liftoff::CalculateActualAddress(
690 this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
691
692 Register store_result = temps.AcquireW();
693
694 Label retry;
695 Label done;
696 Bind(&retry);
697 switch (type.value()) {
698 case StoreType::kI64Store8:
699 case StoreType::kI32Store8:
700 ldaxrb(result_reg.W(), actual_addr);
701 Cmp(result.gp().W(), Operand(expected.gp().W(), UXTB));
702 B(ne, &done);
703 stlxrb(store_result.W(), new_value.gp().W(), actual_addr);
704 break;
705 case StoreType::kI64Store16:
706 case StoreType::kI32Store16:
707 ldaxrh(result_reg.W(), actual_addr);
708 Cmp(result.gp().W(), Operand(expected.gp().W(), UXTH));
709 B(ne, &done);
710 stlxrh(store_result.W(), new_value.gp().W(), actual_addr);
711 break;
712 case StoreType::kI64Store32:
713 case StoreType::kI32Store:
714 ldaxr(result_reg.W(), actual_addr);
715 Cmp(result.gp().W(), Operand(expected.gp().W(), UXTW));
716 B(ne, &done);
717 stlxr(store_result.W(), new_value.gp().W(), actual_addr);
718 break;
719 case StoreType::kI64Store:
720 ldaxr(result_reg.X(), actual_addr);
721 Cmp(result.gp().X(), Operand(expected.gp().X(), UXTX));
722 B(ne, &done);
723 stlxr(store_result.W(), new_value.gp().X(), actual_addr);
724 break;
725 default:
726 UNREACHABLE();
727 }
728
729 Cbnz(store_result.W(), &retry);
730 Bind(&done);
731
732 if (result_reg != result.gp()) {
733 mov(result.gp(), result_reg);
734 }
735 }
736
AtomicFence()737 void LiftoffAssembler::AtomicFence() { Dmb(InnerShareable, BarrierAll); }
738
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueType type)739 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
740 uint32_t caller_slot_idx,
741 ValueType type) {
742 int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
743 Ldr(liftoff::GetRegFromType(dst, type), MemOperand(fp, offset));
744 }
745
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueType type)746 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
747 uint32_t caller_slot_idx,
748 ValueType type) {
749 int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
750 Str(liftoff::GetRegFromType(src, type), MemOperand(fp, offset));
751 }
752
LoadReturnStackSlot(LiftoffRegister dst,int offset,ValueType type)753 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
754 ValueType type) {
755 Ldr(liftoff::GetRegFromType(dst, type), MemOperand(sp, offset));
756 }
757
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueType type)758 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
759 ValueType type) {
760 UseScratchRegisterScope temps(this);
761 CPURegister scratch = liftoff::AcquireByType(&temps, type);
762 Ldr(scratch, liftoff::GetStackSlot(src_offset));
763 Str(scratch, liftoff::GetStackSlot(dst_offset));
764 }
765
Move(Register dst,Register src,ValueType type)766 void LiftoffAssembler::Move(Register dst, Register src, ValueType type) {
767 if (type == kWasmI32) {
768 Mov(dst.W(), src.W());
769 } else {
770 DCHECK(kWasmI64 == type || type.is_reference_type());
771 Mov(dst.X(), src.X());
772 }
773 }
774
Move(DoubleRegister dst,DoubleRegister src,ValueType type)775 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
776 ValueType type) {
777 if (type == kWasmF32) {
778 Fmov(dst.S(), src.S());
779 } else if (type == kWasmF64) {
780 Fmov(dst.D(), src.D());
781 } else {
782 DCHECK_EQ(kWasmS128, type);
783 Mov(dst.Q(), src.Q());
784 }
785 }
786
Spill(int offset,LiftoffRegister reg,ValueType type)787 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) {
788 RecordUsedSpillOffset(offset);
789 MemOperand dst = liftoff::GetStackSlot(offset);
790 Str(liftoff::GetRegFromType(reg, type), dst);
791 }
792
Spill(int offset,WasmValue value)793 void LiftoffAssembler::Spill(int offset, WasmValue value) {
794 RecordUsedSpillOffset(offset);
795 MemOperand dst = liftoff::GetStackSlot(offset);
796 UseScratchRegisterScope temps(this);
797 CPURegister src = CPURegister::no_reg();
798 switch (value.type().kind()) {
799 case ValueType::kI32:
800 if (value.to_i32() == 0) {
801 src = wzr;
802 } else {
803 src = temps.AcquireW();
804 Mov(src.W(), value.to_i32());
805 }
806 break;
807 case ValueType::kI64:
808 if (value.to_i64() == 0) {
809 src = xzr;
810 } else {
811 src = temps.AcquireX();
812 Mov(src.X(), value.to_i64());
813 }
814 break;
815 default:
816 // We do not track f32 and f64 constants, hence they are unreachable.
817 UNREACHABLE();
818 }
819 Str(src, dst);
820 }
821
Fill(LiftoffRegister reg,int offset,ValueType type)822 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) {
823 MemOperand src = liftoff::GetStackSlot(offset);
824 Ldr(liftoff::GetRegFromType(reg, type), src);
825 }
826
FillI64Half(Register,int offset,RegPairHalf)827 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
828 UNREACHABLE();
829 }
830
FillStackSlotsWithZero(int start,int size)831 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
832 // Zero 'size' bytes *below* start, byte at offset 'start' is untouched.
833 DCHECK_LE(0, start);
834 DCHECK_LT(0, size);
835 DCHECK_EQ(0, size % 4);
836 RecordUsedSpillOffset(start + size);
837
838 int max_stp_offset = -start - size;
839 // We check IsImmLSUnscaled(-start-12) because str only allows for unscaled
840 // 9-bit immediate offset [-256,256]. If start is large enough, which can
841 // happen when a function has many params (>=32 i64), str cannot be encoded
842 // properly. We can use Str, which will generate more instructions, so
843 // fallback to the general case below.
844 if (size <= 12 * kStackSlotSize &&
845 IsImmLSPair(max_stp_offset, kXRegSizeLog2) &&
846 IsImmLSUnscaled(-start - 12)) {
847 // Special straight-line code for up to 12 slots. Generates one
848 // instruction per two slots (<= 7 instructions total).
849 STATIC_ASSERT(kStackSlotSize == kSystemPointerSize);
850 uint32_t remainder = size;
851 for (; remainder >= 2 * kStackSlotSize; remainder -= 2 * kStackSlotSize) {
852 stp(xzr, xzr, liftoff::GetStackSlot(start + remainder));
853 }
854
855 DCHECK_GE(12, remainder);
856 switch (remainder) {
857 case 12:
858 str(xzr, liftoff::GetStackSlot(start + remainder));
859 str(wzr, liftoff::GetStackSlot(start + remainder - 8));
860 break;
861 case 8:
862 str(xzr, liftoff::GetStackSlot(start + remainder));
863 break;
864 case 4:
865 str(wzr, liftoff::GetStackSlot(start + remainder));
866 break;
867 case 0:
868 break;
869 default:
870 UNREACHABLE();
871 }
872 } else {
873 // General case for bigger counts (5-8 instructions).
874 UseScratchRegisterScope temps(this);
875 Register address_reg = temps.AcquireX();
876 // This {Sub} might use another temp register if the offset is too large.
877 Sub(address_reg, fp, start + size);
878 Register count_reg = temps.AcquireX();
879 Mov(count_reg, size / 4);
880
881 Label loop;
882 bind(&loop);
883 sub(count_reg, count_reg, 1);
884 str(wzr, MemOperand(address_reg, kSystemPointerSize / 2, PostIndex));
885 cbnz(count_reg, &loop);
886 }
887 }
888
889 #define I32_BINOP(name, instruction) \
890 void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
891 Register rhs) { \
892 instruction(dst.W(), lhs.W(), rhs.W()); \
893 }
894 #define I32_BINOP_I(name, instruction) \
895 I32_BINOP(name, instruction) \
896 void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
897 int32_t imm) { \
898 instruction(dst.W(), lhs.W(), Immediate(imm)); \
899 }
900 #define I64_BINOP(name, instruction) \
901 void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
902 LiftoffRegister rhs) { \
903 instruction(dst.gp().X(), lhs.gp().X(), rhs.gp().X()); \
904 }
905 #define I64_BINOP_I(name, instruction) \
906 I64_BINOP(name, instruction) \
907 void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
908 LiftoffRegister lhs, int32_t imm) { \
909 instruction(dst.gp().X(), lhs.gp().X(), imm); \
910 }
911 #define FP32_BINOP(name, instruction) \
912 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
913 DoubleRegister rhs) { \
914 instruction(dst.S(), lhs.S(), rhs.S()); \
915 }
916 #define FP32_UNOP(name, instruction) \
917 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
918 instruction(dst.S(), src.S()); \
919 }
920 #define FP32_UNOP_RETURN_TRUE(name, instruction) \
921 bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
922 instruction(dst.S(), src.S()); \
923 return true; \
924 }
925 #define FP64_BINOP(name, instruction) \
926 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
927 DoubleRegister rhs) { \
928 instruction(dst.D(), lhs.D(), rhs.D()); \
929 }
930 #define FP64_UNOP(name, instruction) \
931 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
932 instruction(dst.D(), src.D()); \
933 }
934 #define FP64_UNOP_RETURN_TRUE(name, instruction) \
935 bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
936 instruction(dst.D(), src.D()); \
937 return true; \
938 }
939 #define I32_SHIFTOP(name, instruction) \
940 void LiftoffAssembler::emit_##name(Register dst, Register src, \
941 Register amount) { \
942 instruction(dst.W(), src.W(), amount.W()); \
943 } \
944 void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
945 int32_t amount) { \
946 instruction(dst.W(), src.W(), amount & 31); \
947 }
948 #define I64_SHIFTOP(name, instruction) \
949 void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
950 Register amount) { \
951 instruction(dst.gp().X(), src.gp().X(), amount.X()); \
952 } \
953 void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
954 LiftoffRegister src, int32_t amount) { \
955 instruction(dst.gp().X(), src.gp().X(), amount & 63); \
956 }
957
I32_BINOP_I(i32_add,Add)958 I32_BINOP_I(i32_add, Add)
959 I32_BINOP(i32_sub, Sub)
960 I32_BINOP(i32_mul, Mul)
961 I32_BINOP_I(i32_and, And)
962 I32_BINOP_I(i32_or, Orr)
963 I32_BINOP_I(i32_xor, Eor)
964 I32_SHIFTOP(i32_shl, Lsl)
965 I32_SHIFTOP(i32_sar, Asr)
966 I32_SHIFTOP(i32_shr, Lsr)
967 I64_BINOP_I(i64_add, Add)
968 I64_BINOP(i64_sub, Sub)
969 I64_BINOP(i64_mul, Mul)
970 I64_BINOP_I(i64_and, And)
971 I64_BINOP_I(i64_or, Orr)
972 I64_BINOP_I(i64_xor, Eor)
973 I64_SHIFTOP(i64_shl, Lsl)
974 I64_SHIFTOP(i64_sar, Asr)
975 I64_SHIFTOP(i64_shr, Lsr)
976 FP32_BINOP(f32_add, Fadd)
977 FP32_BINOP(f32_sub, Fsub)
978 FP32_BINOP(f32_mul, Fmul)
979 FP32_BINOP(f32_div, Fdiv)
980 FP32_BINOP(f32_min, Fmin)
981 FP32_BINOP(f32_max, Fmax)
982 FP32_UNOP(f32_abs, Fabs)
983 FP32_UNOP(f32_neg, Fneg)
984 FP32_UNOP_RETURN_TRUE(f32_ceil, Frintp)
985 FP32_UNOP_RETURN_TRUE(f32_floor, Frintm)
986 FP32_UNOP_RETURN_TRUE(f32_trunc, Frintz)
987 FP32_UNOP_RETURN_TRUE(f32_nearest_int, Frintn)
988 FP32_UNOP(f32_sqrt, Fsqrt)
989 FP64_BINOP(f64_add, Fadd)
990 FP64_BINOP(f64_sub, Fsub)
991 FP64_BINOP(f64_mul, Fmul)
992 FP64_BINOP(f64_div, Fdiv)
993 FP64_BINOP(f64_min, Fmin)
994 FP64_BINOP(f64_max, Fmax)
995 FP64_UNOP(f64_abs, Fabs)
996 FP64_UNOP(f64_neg, Fneg)
997 FP64_UNOP_RETURN_TRUE(f64_ceil, Frintp)
998 FP64_UNOP_RETURN_TRUE(f64_floor, Frintm)
999 FP64_UNOP_RETURN_TRUE(f64_trunc, Frintz)
1000 FP64_UNOP_RETURN_TRUE(f64_nearest_int, Frintn)
1001 FP64_UNOP(f64_sqrt, Fsqrt)
1002
1003 #undef I32_BINOP
1004 #undef I64_BINOP
1005 #undef FP32_BINOP
1006 #undef FP32_UNOP
1007 #undef FP64_BINOP
1008 #undef FP64_UNOP
1009 #undef FP64_UNOP_RETURN_TRUE
1010 #undef I32_SHIFTOP
1011 #undef I64_SHIFTOP
1012
1013 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1014 Clz(dst.W(), src.W());
1015 }
1016
emit_i32_ctz(Register dst,Register src)1017 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1018 Rbit(dst.W(), src.W());
1019 Clz(dst.W(), dst.W());
1020 }
1021
emit_i32_popcnt(Register dst,Register src)1022 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1023 UseScratchRegisterScope temps(this);
1024 VRegister scratch = temps.AcquireV(kFormat8B);
1025 Fmov(scratch.S(), src.W());
1026 Cnt(scratch, scratch);
1027 Addv(scratch.B(), scratch);
1028 Fmov(dst.W(), scratch.S());
1029 return true;
1030 }
1031
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1032 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1033 Clz(dst.gp().X(), src.gp().X());
1034 }
1035
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1036 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1037 Rbit(dst.gp().X(), src.gp().X());
1038 Clz(dst.gp().X(), dst.gp().X());
1039 }
1040
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1041 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1042 LiftoffRegister src) {
1043 UseScratchRegisterScope temps(this);
1044 VRegister scratch = temps.AcquireV(kFormat8B);
1045 Fmov(scratch.D(), src.gp().X());
1046 Cnt(scratch, scratch);
1047 Addv(scratch.B(), scratch);
1048 Fmov(dst.gp().X(), scratch.D());
1049 return true;
1050 }
1051
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1052 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1053 Label* trap_div_by_zero,
1054 Label* trap_div_unrepresentable) {
1055 Register dst_w = dst.W();
1056 Register lhs_w = lhs.W();
1057 Register rhs_w = rhs.W();
1058 bool can_use_dst = !dst_w.Aliases(lhs_w) && !dst_w.Aliases(rhs_w);
1059 if (can_use_dst) {
1060 // Do div early.
1061 Sdiv(dst_w, lhs_w, rhs_w);
1062 }
1063 // Check for division by zero.
1064 Cbz(rhs_w, trap_div_by_zero);
1065 // Check for kMinInt / -1. This is unrepresentable.
1066 Cmp(rhs_w, -1);
1067 Ccmp(lhs_w, 1, NoFlag, eq);
1068 B(trap_div_unrepresentable, vs);
1069 if (!can_use_dst) {
1070 // Do div.
1071 Sdiv(dst_w, lhs_w, rhs_w);
1072 }
1073 }
1074
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1075 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1076 Label* trap_div_by_zero) {
1077 // Check for division by zero.
1078 Cbz(rhs.W(), trap_div_by_zero);
1079 // Do div.
1080 Udiv(dst.W(), lhs.W(), rhs.W());
1081 }
1082
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1083 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1084 Label* trap_div_by_zero) {
1085 Register dst_w = dst.W();
1086 Register lhs_w = lhs.W();
1087 Register rhs_w = rhs.W();
1088 // Do early div.
1089 // No need to check kMinInt / -1 because the result is kMinInt and then
1090 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1091 UseScratchRegisterScope temps(this);
1092 Register scratch = temps.AcquireW();
1093 Sdiv(scratch, lhs_w, rhs_w);
1094 // Check for division by zero.
1095 Cbz(rhs_w, trap_div_by_zero);
1096 // Compute remainder.
1097 Msub(dst_w, scratch, rhs_w, lhs_w);
1098 }
1099
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1100 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1101 Label* trap_div_by_zero) {
1102 Register dst_w = dst.W();
1103 Register lhs_w = lhs.W();
1104 Register rhs_w = rhs.W();
1105 // Do early div.
1106 UseScratchRegisterScope temps(this);
1107 Register scratch = temps.AcquireW();
1108 Udiv(scratch, lhs_w, rhs_w);
1109 // Check for division by zero.
1110 Cbz(rhs_w, trap_div_by_zero);
1111 // Compute remainder.
1112 Msub(dst_w, scratch, rhs_w, lhs_w);
1113 }
1114
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1115 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1116 LiftoffRegister rhs,
1117 Label* trap_div_by_zero,
1118 Label* trap_div_unrepresentable) {
1119 Register dst_x = dst.gp().X();
1120 Register lhs_x = lhs.gp().X();
1121 Register rhs_x = rhs.gp().X();
1122 bool can_use_dst = !dst_x.Aliases(lhs_x) && !dst_x.Aliases(rhs_x);
1123 if (can_use_dst) {
1124 // Do div early.
1125 Sdiv(dst_x, lhs_x, rhs_x);
1126 }
1127 // Check for division by zero.
1128 Cbz(rhs_x, trap_div_by_zero);
1129 // Check for kMinInt / -1. This is unrepresentable.
1130 Cmp(rhs_x, -1);
1131 Ccmp(lhs_x, 1, NoFlag, eq);
1132 B(trap_div_unrepresentable, vs);
1133 if (!can_use_dst) {
1134 // Do div.
1135 Sdiv(dst_x, lhs_x, rhs_x);
1136 }
1137 return true;
1138 }
1139
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1140 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1141 LiftoffRegister rhs,
1142 Label* trap_div_by_zero) {
1143 // Check for division by zero.
1144 Cbz(rhs.gp().X(), trap_div_by_zero);
1145 // Do div.
1146 Udiv(dst.gp().X(), lhs.gp().X(), rhs.gp().X());
1147 return true;
1148 }
1149
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1150 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1151 LiftoffRegister rhs,
1152 Label* trap_div_by_zero) {
1153 Register dst_x = dst.gp().X();
1154 Register lhs_x = lhs.gp().X();
1155 Register rhs_x = rhs.gp().X();
1156 // Do early div.
1157 // No need to check kMinInt / -1 because the result is kMinInt and then
1158 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1159 UseScratchRegisterScope temps(this);
1160 Register scratch = temps.AcquireX();
1161 Sdiv(scratch, lhs_x, rhs_x);
1162 // Check for division by zero.
1163 Cbz(rhs_x, trap_div_by_zero);
1164 // Compute remainder.
1165 Msub(dst_x, scratch, rhs_x, lhs_x);
1166 return true;
1167 }
1168
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1169 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1170 LiftoffRegister rhs,
1171 Label* trap_div_by_zero) {
1172 Register dst_x = dst.gp().X();
1173 Register lhs_x = lhs.gp().X();
1174 Register rhs_x = rhs.gp().X();
1175 // Do early div.
1176 UseScratchRegisterScope temps(this);
1177 Register scratch = temps.AcquireX();
1178 Udiv(scratch, lhs_x, rhs_x);
1179 // Check for division by zero.
1180 Cbz(rhs_x, trap_div_by_zero);
1181 // Compute remainder.
1182 Msub(dst_x, scratch, rhs_x, lhs_x);
1183 return true;
1184 }
1185
emit_u32_to_intptr(Register dst,Register src)1186 void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) {
1187 Uxtw(dst, src);
1188 }
1189
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1190 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1191 DoubleRegister rhs) {
1192 UseScratchRegisterScope temps(this);
1193 DoubleRegister scratch = temps.AcquireD();
1194 Ushr(scratch.V2S(), rhs.V2S(), 31);
1195 if (dst != lhs) {
1196 Fmov(dst.S(), lhs.S());
1197 }
1198 Sli(dst.V2S(), scratch.V2S(), 31);
1199 }
1200
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1201 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1202 DoubleRegister rhs) {
1203 UseScratchRegisterScope temps(this);
1204 DoubleRegister scratch = temps.AcquireD();
1205 Ushr(scratch.V1D(), rhs.V1D(), 63);
1206 if (dst != lhs) {
1207 Fmov(dst.D(), lhs.D());
1208 }
1209 Sli(dst.V1D(), scratch.V1D(), 63);
1210 }
1211
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)1212 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1213 LiftoffRegister dst,
1214 LiftoffRegister src, Label* trap) {
1215 switch (opcode) {
1216 case kExprI32ConvertI64:
1217 if (src != dst) Mov(dst.gp().W(), src.gp().W());
1218 return true;
1219 case kExprI32SConvertF32:
1220 Fcvtzs(dst.gp().W(), src.fp().S()); // f32 -> i32 round to zero.
1221 // Check underflow and NaN.
1222 Fcmp(src.fp().S(), static_cast<float>(INT32_MIN));
1223 // Check overflow.
1224 Ccmp(dst.gp().W(), -1, VFlag, ge);
1225 B(trap, vs);
1226 return true;
1227 case kExprI32UConvertF32:
1228 Fcvtzu(dst.gp().W(), src.fp().S()); // f32 -> i32 round to zero.
1229 // Check underflow and NaN.
1230 Fcmp(src.fp().S(), -1.0);
1231 // Check overflow.
1232 Ccmp(dst.gp().W(), -1, ZFlag, gt);
1233 B(trap, eq);
1234 return true;
1235 case kExprI32SConvertF64: {
1236 // INT32_MIN and INT32_MAX are valid results, we cannot test the result
1237 // to detect the overflows. We could have done two immediate floating
1238 // point comparisons but it would have generated two conditional branches.
1239 UseScratchRegisterScope temps(this);
1240 VRegister fp_ref = temps.AcquireD();
1241 VRegister fp_cmp = temps.AcquireD();
1242 Fcvtzs(dst.gp().W(), src.fp().D()); // f64 -> i32 round to zero.
1243 Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
1244 Scvtf(fp_cmp, dst.gp().W()); // i32 -> f64.
1245 // If comparison fails, we have an overflow or a NaN.
1246 Fcmp(fp_cmp, fp_ref);
1247 B(trap, ne);
1248 return true;
1249 }
1250 case kExprI32UConvertF64: {
1251 // INT32_MAX is a valid result, we cannot test the result to detect the
1252 // overflows. We could have done two immediate floating point comparisons
1253 // but it would have generated two conditional branches.
1254 UseScratchRegisterScope temps(this);
1255 VRegister fp_ref = temps.AcquireD();
1256 VRegister fp_cmp = temps.AcquireD();
1257 Fcvtzu(dst.gp().W(), src.fp().D()); // f64 -> i32 round to zero.
1258 Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
1259 Ucvtf(fp_cmp, dst.gp().W()); // i32 -> f64.
1260 // If comparison fails, we have an overflow or a NaN.
1261 Fcmp(fp_cmp, fp_ref);
1262 B(trap, ne);
1263 return true;
1264 }
1265 case kExprI32SConvertSatF32:
1266 Fcvtzs(dst.gp().W(), src.fp().S());
1267 return true;
1268 case kExprI32UConvertSatF32:
1269 Fcvtzu(dst.gp().W(), src.fp().S());
1270 return true;
1271 case kExprI32SConvertSatF64:
1272 Fcvtzs(dst.gp().W(), src.fp().D());
1273 return true;
1274 case kExprI32UConvertSatF64:
1275 Fcvtzu(dst.gp().W(), src.fp().D());
1276 return true;
1277 case kExprI64SConvertSatF32:
1278 Fcvtzs(dst.gp().X(), src.fp().S());
1279 return true;
1280 case kExprI64UConvertSatF32:
1281 Fcvtzu(dst.gp().X(), src.fp().S());
1282 return true;
1283 case kExprI64SConvertSatF64:
1284 Fcvtzs(dst.gp().X(), src.fp().D());
1285 return true;
1286 case kExprI64UConvertSatF64:
1287 Fcvtzu(dst.gp().X(), src.fp().D());
1288 return true;
1289 case kExprI32ReinterpretF32:
1290 Fmov(dst.gp().W(), src.fp().S());
1291 return true;
1292 case kExprI64SConvertI32:
1293 Sxtw(dst.gp().X(), src.gp().W());
1294 return true;
1295 case kExprI64SConvertF32:
1296 Fcvtzs(dst.gp().X(), src.fp().S()); // f32 -> i64 round to zero.
1297 // Check underflow and NaN.
1298 Fcmp(src.fp().S(), static_cast<float>(INT64_MIN));
1299 // Check overflow.
1300 Ccmp(dst.gp().X(), -1, VFlag, ge);
1301 B(trap, vs);
1302 return true;
1303 case kExprI64UConvertF32:
1304 Fcvtzu(dst.gp().X(), src.fp().S()); // f32 -> i64 round to zero.
1305 // Check underflow and NaN.
1306 Fcmp(src.fp().S(), -1.0);
1307 // Check overflow.
1308 Ccmp(dst.gp().X(), -1, ZFlag, gt);
1309 B(trap, eq);
1310 return true;
1311 case kExprI64SConvertF64:
1312 Fcvtzs(dst.gp().X(), src.fp().D()); // f64 -> i64 round to zero.
1313 // Check underflow and NaN.
1314 Fcmp(src.fp().D(), static_cast<float>(INT64_MIN));
1315 // Check overflow.
1316 Ccmp(dst.gp().X(), -1, VFlag, ge);
1317 B(trap, vs);
1318 return true;
1319 case kExprI64UConvertF64:
1320 Fcvtzu(dst.gp().X(), src.fp().D()); // f64 -> i64 round to zero.
1321 // Check underflow and NaN.
1322 Fcmp(src.fp().D(), -1.0);
1323 // Check overflow.
1324 Ccmp(dst.gp().X(), -1, ZFlag, gt);
1325 B(trap, eq);
1326 return true;
1327 case kExprI64UConvertI32:
1328 Mov(dst.gp().W(), src.gp().W());
1329 return true;
1330 case kExprI64ReinterpretF64:
1331 Fmov(dst.gp().X(), src.fp().D());
1332 return true;
1333 case kExprF32SConvertI32:
1334 Scvtf(dst.fp().S(), src.gp().W());
1335 return true;
1336 case kExprF32UConvertI32:
1337 Ucvtf(dst.fp().S(), src.gp().W());
1338 return true;
1339 case kExprF32SConvertI64:
1340 Scvtf(dst.fp().S(), src.gp().X());
1341 return true;
1342 case kExprF32UConvertI64:
1343 Ucvtf(dst.fp().S(), src.gp().X());
1344 return true;
1345 case kExprF32ConvertF64:
1346 Fcvt(dst.fp().S(), src.fp().D());
1347 return true;
1348 case kExprF32ReinterpretI32:
1349 Fmov(dst.fp().S(), src.gp().W());
1350 return true;
1351 case kExprF64SConvertI32:
1352 Scvtf(dst.fp().D(), src.gp().W());
1353 return true;
1354 case kExprF64UConvertI32:
1355 Ucvtf(dst.fp().D(), src.gp().W());
1356 return true;
1357 case kExprF64SConvertI64:
1358 Scvtf(dst.fp().D(), src.gp().X());
1359 return true;
1360 case kExprF64UConvertI64:
1361 Ucvtf(dst.fp().D(), src.gp().X());
1362 return true;
1363 case kExprF64ConvertF32:
1364 Fcvt(dst.fp().D(), src.fp().S());
1365 return true;
1366 case kExprF64ReinterpretI64:
1367 Fmov(dst.fp().D(), src.gp().X());
1368 return true;
1369 default:
1370 UNREACHABLE();
1371 }
1372 }
1373
emit_i32_signextend_i8(Register dst,Register src)1374 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
1375 sxtb(dst, src);
1376 }
1377
emit_i32_signextend_i16(Register dst,Register src)1378 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
1379 sxth(dst, src);
1380 }
1381
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)1382 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
1383 LiftoffRegister src) {
1384 sxtb(dst.gp(), src.gp());
1385 }
1386
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)1387 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
1388 LiftoffRegister src) {
1389 sxth(dst.gp(), src.gp());
1390 }
1391
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)1392 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
1393 LiftoffRegister src) {
1394 sxtw(dst.gp(), src.gp());
1395 }
1396
emit_jump(Label * label)1397 void LiftoffAssembler::emit_jump(Label* label) { B(label); }
1398
emit_jump(Register target)1399 void LiftoffAssembler::emit_jump(Register target) { Br(target); }
1400
emit_cond_jump(Condition cond,Label * label,ValueType type,Register lhs,Register rhs)1401 void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
1402 ValueType type, Register lhs,
1403 Register rhs) {
1404 switch (type.kind()) {
1405 case ValueType::kI32:
1406 if (rhs.is_valid()) {
1407 Cmp(lhs.W(), rhs.W());
1408 } else {
1409 Cmp(lhs.W(), wzr);
1410 }
1411 break;
1412 case ValueType::kI64:
1413 if (rhs.is_valid()) {
1414 Cmp(lhs.X(), rhs.X());
1415 } else {
1416 Cmp(lhs.X(), xzr);
1417 }
1418 break;
1419 default:
1420 UNREACHABLE();
1421 }
1422 B(label, cond);
1423 }
1424
emit_i32_eqz(Register dst,Register src)1425 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
1426 Cmp(src.W(), wzr);
1427 Cset(dst.W(), eq);
1428 }
1429
emit_i32_set_cond(Condition cond,Register dst,Register lhs,Register rhs)1430 void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst,
1431 Register lhs, Register rhs) {
1432 Cmp(lhs.W(), rhs.W());
1433 Cset(dst.W(), cond);
1434 }
1435
emit_i64_eqz(Register dst,LiftoffRegister src)1436 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
1437 Cmp(src.gp().X(), xzr);
1438 Cset(dst.W(), eq);
1439 }
1440
emit_i64_set_cond(Condition cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)1441 void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst,
1442 LiftoffRegister lhs,
1443 LiftoffRegister rhs) {
1444 Cmp(lhs.gp().X(), rhs.gp().X());
1445 Cset(dst.W(), cond);
1446 }
1447
emit_f32_set_cond(Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)1448 void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst,
1449 DoubleRegister lhs,
1450 DoubleRegister rhs) {
1451 Fcmp(lhs.S(), rhs.S());
1452 Cset(dst.W(), cond);
1453 if (cond != ne) {
1454 // If V flag set, at least one of the arguments was a Nan -> false.
1455 Csel(dst.W(), wzr, dst.W(), vs);
1456 }
1457 }
1458
emit_f64_set_cond(Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)1459 void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
1460 DoubleRegister lhs,
1461 DoubleRegister rhs) {
1462 Fcmp(lhs.D(), rhs.D());
1463 Cset(dst.W(), cond);
1464 if (cond != ne) {
1465 // If V flag set, at least one of the arguments was a Nan -> false.
1466 Csel(dst.W(), wzr, dst.W(), vs);
1467 }
1468 }
1469
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueType type)1470 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
1471 LiftoffRegister true_value,
1472 LiftoffRegister false_value,
1473 ValueType type) {
1474 return false;
1475 }
1476
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)1477 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
1478 Register offset_reg, uint32_t offset_imm,
1479 LoadType type,
1480 LoadTransformationKind transform,
1481 uint32_t* protected_load_pc) {
1482 UseScratchRegisterScope temps(this);
1483 MemOperand src_op =
1484 liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
1485 *protected_load_pc = pc_offset();
1486 MachineType memtype = type.mem_type();
1487
1488 if (transform == LoadTransformationKind::kExtend) {
1489 if (memtype == MachineType::Int8()) {
1490 Ldr(dst.fp().D(), src_op);
1491 Sxtl(dst.fp().V8H(), dst.fp().V8B());
1492 } else if (memtype == MachineType::Uint8()) {
1493 Ldr(dst.fp().D(), src_op);
1494 Uxtl(dst.fp().V8H(), dst.fp().V8B());
1495 } else if (memtype == MachineType::Int16()) {
1496 Ldr(dst.fp().D(), src_op);
1497 Sxtl(dst.fp().V4S(), dst.fp().V4H());
1498 } else if (memtype == MachineType::Uint16()) {
1499 Ldr(dst.fp().D(), src_op);
1500 Uxtl(dst.fp().V4S(), dst.fp().V4H());
1501 } else if (memtype == MachineType::Int32()) {
1502 Ldr(dst.fp().D(), src_op);
1503 Sxtl(dst.fp().V2D(), dst.fp().V2S());
1504 } else if (memtype == MachineType::Uint32()) {
1505 Ldr(dst.fp().D(), src_op);
1506 Uxtl(dst.fp().V2D(), dst.fp().V2S());
1507 }
1508 } else if (transform == LoadTransformationKind::kZeroExtend) {
1509 if (memtype == MachineType::Int32()) {
1510 Ldr(dst.fp().S(), src_op);
1511 } else {
1512 DCHECK_EQ(MachineType::Int64(), memtype);
1513 Ldr(dst.fp().D(), src_op);
1514 }
1515 } else {
1516 // ld1r only allows no offset or post-index, so emit an add.
1517 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
1518 if (src_op.IsRegisterOffset()) {
1519 // We have 2 tmp gps, so it's okay to acquire 1 more here, and actually
1520 // doesn't matter if we acquire the same one.
1521 Register tmp = temps.AcquireX();
1522 Add(tmp, src_op.base(), src_op.regoffset().X());
1523 src_op = MemOperand(tmp.X(), 0);
1524 } else if (src_op.IsImmediateOffset() && src_op.offset() != 0) {
1525 Register tmp = temps.AcquireX();
1526 Add(tmp, src_op.base(), src_op.offset());
1527 src_op = MemOperand(tmp.X(), 0);
1528 }
1529
1530 if (memtype == MachineType::Int8()) {
1531 ld1r(dst.fp().V16B(), src_op);
1532 } else if (memtype == MachineType::Int16()) {
1533 ld1r(dst.fp().V8H(), src_op);
1534 } else if (memtype == MachineType::Int32()) {
1535 ld1r(dst.fp().V4S(), src_op);
1536 } else if (memtype == MachineType::Int64()) {
1537 ld1r(dst.fp().V2D(), src_op);
1538 }
1539 }
1540 }
1541
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1542 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
1543 LiftoffRegister lhs,
1544 LiftoffRegister rhs) {
1545 Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
1546 }
1547
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)1548 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
1549 LiftoffRegister src) {
1550 Dup(dst.fp().V2D(), src.fp().D(), 0);
1551 }
1552
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)1553 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
1554 LiftoffRegister lhs,
1555 uint8_t imm_lane_idx) {
1556 Mov(dst.fp().D(), lhs.fp().V2D(), imm_lane_idx);
1557 }
1558
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)1559 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
1560 LiftoffRegister src1,
1561 LiftoffRegister src2,
1562 uint8_t imm_lane_idx) {
1563 if (dst != src1) {
1564 Mov(dst.fp().V2D(), src1.fp().V2D());
1565 }
1566 Mov(dst.fp().V2D(), imm_lane_idx, src2.fp().V2D(), 0);
1567 }
1568
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)1569 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
1570 LiftoffRegister src) {
1571 Fabs(dst.fp().V2D(), src.fp().V2D());
1572 }
1573
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)1574 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
1575 LiftoffRegister src) {
1576 Fneg(dst.fp().V2D(), src.fp().V2D());
1577 }
1578
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)1579 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
1580 LiftoffRegister src) {
1581 Fsqrt(dst.fp().V2D(), src.fp().V2D());
1582 }
1583
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)1584 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
1585 LiftoffRegister src) {
1586 Frintp(dst.fp().V2D(), src.fp().V2D());
1587 return true;
1588 }
1589
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)1590 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
1591 LiftoffRegister src) {
1592 Frintm(dst.fp().V2D(), src.fp().V2D());
1593 return true;
1594 }
1595
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)1596 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
1597 LiftoffRegister src) {
1598 Frintz(dst.fp().V2D(), src.fp().V2D());
1599 return true;
1600 }
1601
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)1602 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
1603 LiftoffRegister src) {
1604 Frintn(dst.fp().V2D(), src.fp().V2D());
1605 return true;
1606 }
1607
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1608 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
1609 LiftoffRegister rhs) {
1610 Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1611 }
1612
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1613 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
1614 LiftoffRegister rhs) {
1615 Fsub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1616 }
1617
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1618 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
1619 LiftoffRegister rhs) {
1620 Fmul(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1621 }
1622
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1623 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
1624 LiftoffRegister rhs) {
1625 Fdiv(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1626 }
1627
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1628 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
1629 LiftoffRegister rhs) {
1630 Fmin(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1631 }
1632
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1633 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
1634 LiftoffRegister rhs) {
1635 Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1636 }
1637
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1638 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
1639 LiftoffRegister rhs) {
1640 UseScratchRegisterScope temps(this);
1641
1642 VRegister tmp = dst.fp();
1643 if (dst == lhs || dst == rhs) {
1644 tmp = temps.AcquireV(kFormat2D);
1645 }
1646
1647 Fcmgt(tmp.V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1648 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1649
1650 if (dst == lhs || dst == rhs) {
1651 Mov(dst.fp().V2D(), tmp);
1652 }
1653 }
1654
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1655 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
1656 LiftoffRegister rhs) {
1657 UseScratchRegisterScope temps(this);
1658
1659 VRegister tmp = dst.fp();
1660 if (dst == lhs || dst == rhs) {
1661 tmp = temps.AcquireV(kFormat2D);
1662 }
1663
1664 Fcmgt(tmp.V2D(), rhs.fp().V2D(), lhs.fp().V2D());
1665 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1666
1667 if (dst == lhs || dst == rhs) {
1668 Mov(dst.fp().V2D(), tmp);
1669 }
1670 }
1671
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)1672 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
1673 LiftoffRegister src) {
1674 Dup(dst.fp().V4S(), src.fp().S(), 0);
1675 }
1676
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)1677 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
1678 LiftoffRegister lhs,
1679 uint8_t imm_lane_idx) {
1680 Mov(dst.fp().S(), lhs.fp().V4S(), imm_lane_idx);
1681 }
1682
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)1683 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
1684 LiftoffRegister src1,
1685 LiftoffRegister src2,
1686 uint8_t imm_lane_idx) {
1687 if (dst != src1) {
1688 Mov(dst.fp().V4S(), src1.fp().V4S());
1689 }
1690 Mov(dst.fp().V4S(), imm_lane_idx, src2.fp().V4S(), 0);
1691 }
1692
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)1693 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
1694 LiftoffRegister src) {
1695 Fabs(dst.fp().V4S(), src.fp().V4S());
1696 }
1697
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)1698 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
1699 LiftoffRegister src) {
1700 Fneg(dst.fp().V4S(), src.fp().V4S());
1701 }
1702
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)1703 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
1704 LiftoffRegister src) {
1705 Fsqrt(dst.fp().V4S(), src.fp().V4S());
1706 }
1707
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)1708 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
1709 LiftoffRegister src) {
1710 Frintp(dst.fp().V4S(), src.fp().V4S());
1711 return true;
1712 }
1713
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)1714 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
1715 LiftoffRegister src) {
1716 Frintm(dst.fp().V4S(), src.fp().V4S());
1717 return true;
1718 }
1719
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)1720 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
1721 LiftoffRegister src) {
1722 Frintz(dst.fp().V4S(), src.fp().V4S());
1723 return true;
1724 }
1725
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)1726 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
1727 LiftoffRegister src) {
1728 Frintn(dst.fp().V4S(), src.fp().V4S());
1729 return true;
1730 }
1731
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1732 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
1733 LiftoffRegister rhs) {
1734 Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1735 }
1736
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1737 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
1738 LiftoffRegister rhs) {
1739 Fsub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1740 }
1741
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1742 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
1743 LiftoffRegister rhs) {
1744 Fmul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1745 }
1746
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1747 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
1748 LiftoffRegister rhs) {
1749 Fdiv(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1750 }
1751
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1752 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
1753 LiftoffRegister rhs) {
1754 Fmin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1755 }
1756
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1757 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
1758 LiftoffRegister rhs) {
1759 Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1760 }
1761
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1762 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
1763 LiftoffRegister rhs) {
1764 UseScratchRegisterScope temps(this);
1765
1766 VRegister tmp = dst.fp();
1767 if (dst == lhs || dst == rhs) {
1768 tmp = temps.AcquireV(kFormat4S);
1769 }
1770
1771 Fcmgt(tmp.V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1772 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1773
1774 if (dst == lhs || dst == rhs) {
1775 Mov(dst.fp().V4S(), tmp);
1776 }
1777 }
1778
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1779 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
1780 LiftoffRegister rhs) {
1781 UseScratchRegisterScope temps(this);
1782
1783 VRegister tmp = dst.fp();
1784 if (dst == lhs || dst == rhs) {
1785 tmp = temps.AcquireV(kFormat4S);
1786 }
1787
1788 Fcmgt(tmp.V4S(), rhs.fp().V4S(), lhs.fp().V4S());
1789 Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
1790
1791 if (dst == lhs || dst == rhs) {
1792 Mov(dst.fp().V4S(), tmp);
1793 }
1794 }
1795
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)1796 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
1797 LiftoffRegister src) {
1798 Dup(dst.fp().V2D(), src.gp().X());
1799 }
1800
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)1801 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
1802 LiftoffRegister lhs,
1803 uint8_t imm_lane_idx) {
1804 Mov(dst.gp().X(), lhs.fp().V2D(), imm_lane_idx);
1805 }
1806
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)1807 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
1808 LiftoffRegister src1,
1809 LiftoffRegister src2,
1810 uint8_t imm_lane_idx) {
1811 if (dst != src1) {
1812 Mov(dst.fp().V2D(), src1.fp().V2D());
1813 }
1814 Mov(dst.fp().V2D(), imm_lane_idx, src2.gp().X());
1815 }
1816
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)1817 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
1818 LiftoffRegister src) {
1819 Neg(dst.fp().V2D(), src.fp().V2D());
1820 }
1821
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1822 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
1823 LiftoffRegister rhs) {
1824 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
1825 this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
1826 }
1827
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)1828 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
1829 int32_t rhs) {
1830 Shl(dst.fp().V2D(), lhs.fp().V2D(), rhs & 63);
1831 }
1832
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1833 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
1834 LiftoffRegister lhs,
1835 LiftoffRegister rhs) {
1836 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
1837 liftoff::ShiftSign::kSigned>(
1838 this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
1839 }
1840
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)1841 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
1842 LiftoffRegister lhs, int32_t rhs) {
1843 liftoff::EmitSimdShiftRightImmediate<kFormat2D, liftoff::ShiftSign::kSigned>(
1844 this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
1845 }
1846
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1847 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
1848 LiftoffRegister lhs,
1849 LiftoffRegister rhs) {
1850 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
1851 liftoff::ShiftSign::kUnsigned>(
1852 this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
1853 }
1854
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)1855 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
1856 LiftoffRegister lhs, int32_t rhs) {
1857 liftoff::EmitSimdShiftRightImmediate<kFormat2D,
1858 liftoff::ShiftSign::kUnsigned>(
1859 this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
1860 }
1861
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1862 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
1863 LiftoffRegister rhs) {
1864 Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1865 }
1866
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1867 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
1868 LiftoffRegister rhs) {
1869 Sub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
1870 }
1871
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1872 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
1873 LiftoffRegister rhs) {
1874 UseScratchRegisterScope temps(this);
1875 VRegister tmp1 = temps.AcquireV(kFormat2D);
1876 VRegister tmp2 = temps.AcquireV(kFormat2D);
1877
1878 // Algorithm copied from code-generator-arm64.cc with minor modifications:
1879 // - 2 (max number of scratch registers in Liftoff) temporaries instead of 3
1880 // - 1 more Umull instruction to calculate | cg | ae |,
1881 // - so, we can no longer use Umlal in the last step, and use Add instead.
1882 // Refer to comments there for details.
1883 Xtn(tmp1.V2S(), lhs.fp().V2D());
1884 Xtn(tmp2.V2S(), rhs.fp().V2D());
1885 Umull(tmp1.V2D(), tmp1.V2S(), tmp2.V2S());
1886 Rev64(tmp2.V4S(), rhs.fp().V4S());
1887 Mul(tmp2.V4S(), tmp2.V4S(), lhs.fp().V4S());
1888 Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
1889 Shll(dst.fp().V2D(), tmp2.V2S(), 32);
1890 Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
1891 }
1892
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)1893 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
1894 LiftoffRegister src) {
1895 Dup(dst.fp().V4S(), src.gp().W());
1896 }
1897
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)1898 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
1899 LiftoffRegister lhs,
1900 uint8_t imm_lane_idx) {
1901 Mov(dst.gp().W(), lhs.fp().V4S(), imm_lane_idx);
1902 }
1903
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)1904 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
1905 LiftoffRegister src1,
1906 LiftoffRegister src2,
1907 uint8_t imm_lane_idx) {
1908 if (dst != src1) {
1909 Mov(dst.fp().V4S(), src1.fp().V4S());
1910 }
1911 Mov(dst.fp().V4S(), imm_lane_idx, src2.gp().W());
1912 }
1913
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)1914 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
1915 LiftoffRegister src) {
1916 Neg(dst.fp().V4S(), src.fp().V4S());
1917 }
1918
emit_v32x4_anytrue(LiftoffRegister dst,LiftoffRegister src)1919 void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst,
1920 LiftoffRegister src) {
1921 liftoff::EmitAnyTrue(this, dst, src);
1922 }
1923
emit_v32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)1924 void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
1925 LiftoffRegister src) {
1926 liftoff::EmitAllTrue(this, dst, src, kFormat4S);
1927 }
1928
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)1929 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
1930 LiftoffRegister src) {
1931 UseScratchRegisterScope temps(this);
1932 VRegister tmp = temps.AcquireQ();
1933 VRegister mask = temps.AcquireQ();
1934
1935 Sshr(tmp.V4S(), src.fp().V4S(), 31);
1936 // Set i-th bit of each lane i. When AND with tmp, the lanes that
1937 // are signed will have i-th bit set, unsigned will be 0.
1938 Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
1939 And(tmp.V16B(), mask.V16B(), tmp.V16B());
1940 Addv(tmp.S(), tmp.V4S());
1941 Mov(dst.gp().W(), tmp.V4S(), 0);
1942 }
1943
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1944 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
1945 LiftoffRegister rhs) {
1946 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
1947 this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
1948 }
1949
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)1950 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
1951 int32_t rhs) {
1952 Shl(dst.fp().V4S(), lhs.fp().V4S(), rhs & 31);
1953 }
1954
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1955 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
1956 LiftoffRegister lhs,
1957 LiftoffRegister rhs) {
1958 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
1959 liftoff::ShiftSign::kSigned>(
1960 this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
1961 }
1962
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)1963 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
1964 LiftoffRegister lhs, int32_t rhs) {
1965 liftoff::EmitSimdShiftRightImmediate<kFormat4S, liftoff::ShiftSign::kSigned>(
1966 this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
1967 }
1968
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1969 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
1970 LiftoffRegister lhs,
1971 LiftoffRegister rhs) {
1972 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
1973 liftoff::ShiftSign::kUnsigned>(
1974 this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
1975 }
1976
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)1977 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
1978 LiftoffRegister lhs, int32_t rhs) {
1979 liftoff::EmitSimdShiftRightImmediate<kFormat4S,
1980 liftoff::ShiftSign::kUnsigned>(
1981 this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
1982 }
1983
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1984 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
1985 LiftoffRegister rhs) {
1986 Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1987 }
1988
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1989 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
1990 LiftoffRegister rhs) {
1991 Sub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1992 }
1993
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1994 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
1995 LiftoffRegister rhs) {
1996 Mul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
1997 }
1998
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1999 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
2000 LiftoffRegister lhs,
2001 LiftoffRegister rhs) {
2002 Smin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2003 }
2004
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2005 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
2006 LiftoffRegister lhs,
2007 LiftoffRegister rhs) {
2008 Umin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2009 }
2010
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2011 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
2012 LiftoffRegister lhs,
2013 LiftoffRegister rhs) {
2014 Smax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2015 }
2016
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2017 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
2018 LiftoffRegister lhs,
2019 LiftoffRegister rhs) {
2020 Umax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2021 }
2022
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2023 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
2024 LiftoffRegister lhs,
2025 LiftoffRegister rhs) {
2026 UseScratchRegisterScope scope(this);
2027 VRegister tmp1 = scope.AcquireV(kFormat4S);
2028 VRegister tmp2 = scope.AcquireV(kFormat4S);
2029 Smull(tmp1, lhs.fp().V4H(), rhs.fp().V4H());
2030 Smull2(tmp2, lhs.fp().V8H(), rhs.fp().V8H());
2031 Addp(dst.fp().V4S(), tmp1, tmp2);
2032 }
2033
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2034 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2035 LiftoffRegister src) {
2036 Dup(dst.fp().V8H(), src.gp().W());
2037 }
2038
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2039 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
2040 LiftoffRegister lhs,
2041 uint8_t imm_lane_idx) {
2042 Umov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
2043 }
2044
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2045 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
2046 LiftoffRegister lhs,
2047 uint8_t imm_lane_idx) {
2048 Smov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
2049 }
2050
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2051 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
2052 LiftoffRegister src1,
2053 LiftoffRegister src2,
2054 uint8_t imm_lane_idx) {
2055 if (dst != src1) {
2056 Mov(dst.fp().V8H(), src1.fp().V8H());
2057 }
2058 Mov(dst.fp().V8H(), imm_lane_idx, src2.gp().W());
2059 }
2060
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)2061 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
2062 LiftoffRegister src) {
2063 Neg(dst.fp().V8H(), src.fp().V8H());
2064 }
2065
emit_v16x8_anytrue(LiftoffRegister dst,LiftoffRegister src)2066 void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst,
2067 LiftoffRegister src) {
2068 liftoff::EmitAnyTrue(this, dst, src);
2069 }
2070
emit_v16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)2071 void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
2072 LiftoffRegister src) {
2073 liftoff::EmitAllTrue(this, dst, src, kFormat8H);
2074 }
2075
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)2076 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
2077 LiftoffRegister src) {
2078 UseScratchRegisterScope temps(this);
2079 VRegister tmp = temps.AcquireQ();
2080 VRegister mask = temps.AcquireQ();
2081
2082 Sshr(tmp.V8H(), src.fp().V8H(), 15);
2083 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2084 // are signed will have i-th bit set, unsigned will be 0.
2085 Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
2086 And(tmp.V16B(), mask.V16B(), tmp.V16B());
2087 Addv(tmp.H(), tmp.V8H());
2088 Mov(dst.gp().W(), tmp.V8H(), 0);
2089 }
2090
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2091 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
2092 LiftoffRegister rhs) {
2093 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2094 this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2095 }
2096
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2097 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
2098 int32_t rhs) {
2099 Shl(dst.fp().V8H(), lhs.fp().V8H(), rhs & 15);
2100 }
2101
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2102 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
2103 LiftoffRegister lhs,
2104 LiftoffRegister rhs) {
2105 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2106 liftoff::ShiftSign::kSigned>(
2107 this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2108 }
2109
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2110 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
2111 LiftoffRegister lhs, int32_t rhs) {
2112 liftoff::EmitSimdShiftRightImmediate<kFormat8H, liftoff::ShiftSign::kSigned>(
2113 this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
2114 }
2115
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2116 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
2117 LiftoffRegister lhs,
2118 LiftoffRegister rhs) {
2119 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2120 liftoff::ShiftSign::kUnsigned>(
2121 this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
2122 }
2123
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2124 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
2125 LiftoffRegister lhs, int32_t rhs) {
2126 liftoff::EmitSimdShiftRightImmediate<kFormat8H,
2127 liftoff::ShiftSign::kUnsigned>(
2128 this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
2129 }
2130
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2131 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
2132 LiftoffRegister rhs) {
2133 Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2134 }
2135
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2136 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
2137 LiftoffRegister lhs,
2138 LiftoffRegister rhs) {
2139 Sqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2140 }
2141
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2142 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
2143 LiftoffRegister rhs) {
2144 Sub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2145 }
2146
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2147 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
2148 LiftoffRegister lhs,
2149 LiftoffRegister rhs) {
2150 Sqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2151 }
2152
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2153 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
2154 LiftoffRegister lhs,
2155 LiftoffRegister rhs) {
2156 Uqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2157 }
2158
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2159 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
2160 LiftoffRegister rhs) {
2161 Mul(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2162 }
2163
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2164 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
2165 LiftoffRegister lhs,
2166 LiftoffRegister rhs) {
2167 Uqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2168 }
2169
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2170 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
2171 LiftoffRegister lhs,
2172 LiftoffRegister rhs) {
2173 Smin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2174 }
2175
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2176 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
2177 LiftoffRegister lhs,
2178 LiftoffRegister rhs) {
2179 Umin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2180 }
2181
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2182 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
2183 LiftoffRegister lhs,
2184 LiftoffRegister rhs) {
2185 Smax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2186 }
2187
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2188 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
2189 LiftoffRegister lhs,
2190 LiftoffRegister rhs) {
2191 Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2192 }
2193
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2194 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2195 LiftoffRegister lhs,
2196 LiftoffRegister rhs,
2197 const uint8_t shuffle[16],
2198 bool is_swizzle) {
2199 VRegister src1 = lhs.fp();
2200 VRegister src2 = rhs.fp();
2201 VRegister temp = dst.fp();
2202 if (dst == lhs || dst == rhs) {
2203 // dst overlaps with lhs or rhs, so we need a temporary.
2204 temp = GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp();
2205 }
2206
2207 UseScratchRegisterScope scope(this);
2208
2209 if (src1 != src2 && !AreConsecutive(src1, src2)) {
2210 // Tbl needs consecutive registers, which our scratch registers are.
2211 src1 = scope.AcquireV(kFormat16B);
2212 src2 = scope.AcquireV(kFormat16B);
2213 DCHECK(AreConsecutive(src1, src2));
2214 Mov(src1.Q(), lhs.fp().Q());
2215 Mov(src2.Q(), rhs.fp().Q());
2216 }
2217
2218 int64_t imms[2] = {0, 0};
2219 for (int i = 7; i >= 0; i--) {
2220 imms[0] = (imms[0] << 8) | (shuffle[i]);
2221 imms[1] = (imms[1] << 8) | (shuffle[i + 8]);
2222 }
2223 DCHECK_EQ(0, (imms[0] | imms[1]) &
2224 (lhs == rhs ? 0xF0F0F0F0F0F0F0F0 : 0xE0E0E0E0E0E0E0E0));
2225
2226 Movi(temp.V16B(), imms[1], imms[0]);
2227
2228 if (src1 == src2) {
2229 Tbl(dst.fp().V16B(), src1.V16B(), temp.V16B());
2230 } else {
2231 Tbl(dst.fp().V16B(), src1.V16B(), src2.V16B(), temp.V16B());
2232 }
2233 }
2234
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2235 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2236 LiftoffRegister src) {
2237 Dup(dst.fp().V16B(), src.gp().W());
2238 }
2239
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2240 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
2241 LiftoffRegister lhs,
2242 uint8_t imm_lane_idx) {
2243 Umov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
2244 }
2245
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2246 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
2247 LiftoffRegister lhs,
2248 uint8_t imm_lane_idx) {
2249 Smov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
2250 }
2251
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2252 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
2253 LiftoffRegister src1,
2254 LiftoffRegister src2,
2255 uint8_t imm_lane_idx) {
2256 if (dst != src1) {
2257 Mov(dst.fp().V16B(), src1.fp().V16B());
2258 }
2259 Mov(dst.fp().V16B(), imm_lane_idx, src2.gp().W());
2260 }
2261
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)2262 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2263 LiftoffRegister src) {
2264 Neg(dst.fp().V16B(), src.fp().V16B());
2265 }
2266
emit_v8x16_anytrue(LiftoffRegister dst,LiftoffRegister src)2267 void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst,
2268 LiftoffRegister src) {
2269 liftoff::EmitAnyTrue(this, dst, src);
2270 }
2271
emit_v8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)2272 void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
2273 LiftoffRegister src) {
2274 liftoff::EmitAllTrue(this, dst, src, kFormat16B);
2275 }
2276
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)2277 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2278 LiftoffRegister src) {
2279 UseScratchRegisterScope temps(this);
2280 VRegister tmp = temps.AcquireQ();
2281 VRegister mask = temps.AcquireQ();
2282
2283 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2284 // are signed will have i-th bit set, unsigned will be 0.
2285 Sshr(tmp.V16B(), src.fp().V16B(), 7);
2286 Movi(mask.V2D(), 0x8040'2010'0804'0201);
2287 And(tmp.V16B(), mask.V16B(), tmp.V16B());
2288 Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
2289 Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
2290 Addv(tmp.H(), tmp.V8H());
2291 Mov(dst.gp().W(), tmp.V8H(), 0);
2292 }
2293
2294 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2295 LiftoffRegister rhs) {
2296 liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
2297 this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2298 }
2299
2300 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2301 int32_t rhs) {
2302 Shl(dst.fp().V16B(), lhs.fp().V16B(), rhs & 7);
2303 }
2304
2305 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2306 LiftoffRegister lhs,
2307 LiftoffRegister rhs) {
2308 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2309 liftoff::ShiftSign::kSigned>(
2310 this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2311 }
2312
2313 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2314 LiftoffRegister lhs, int32_t rhs) {
2315 liftoff::EmitSimdShiftRightImmediate<kFormat16B, liftoff::ShiftSign::kSigned>(
2316 this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
2317 }
2318
2319 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2320 LiftoffRegister lhs,
2321 LiftoffRegister rhs) {
2322 liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
2323 liftoff::ShiftSign::kUnsigned>(
2324 this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
2325 }
2326
2327 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2328 LiftoffRegister lhs, int32_t rhs) {
2329 liftoff::EmitSimdShiftRightImmediate<kFormat16B,
2330 liftoff::ShiftSign::kUnsigned>(
2331 this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
2332 }
2333
2334 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2335 LiftoffRegister rhs) {
2336 Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2337 }
2338
2339 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2340 LiftoffRegister lhs,
2341 LiftoffRegister rhs) {
2342 Sqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2343 }
2344
2345 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2346 LiftoffRegister rhs) {
2347 Sub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2348 }
2349
2350 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2351 LiftoffRegister lhs,
2352 LiftoffRegister rhs) {
2353 Sqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2354 }
2355
2356 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2357 LiftoffRegister lhs,
2358 LiftoffRegister rhs) {
2359 Uqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2360 }
2361
2362 void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
2363 LiftoffRegister rhs) {
2364 Mul(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2365 }
2366
2367 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2368 LiftoffRegister lhs,
2369 LiftoffRegister rhs) {
2370 Uqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2371 }
2372
2373 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2374 LiftoffRegister lhs,
2375 LiftoffRegister rhs) {
2376 Smin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2377 }
2378
2379 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2380 LiftoffRegister lhs,
2381 LiftoffRegister rhs) {
2382 Umin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2383 }
2384
2385 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
2386 LiftoffRegister lhs,
2387 LiftoffRegister rhs) {
2388 Smax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2389 }
2390
2391 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
2392 LiftoffRegister lhs,
2393 LiftoffRegister rhs) {
2394 Umax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2395 }
2396
2397 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2398 LiftoffRegister rhs) {
2399 Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2400 }
2401
2402 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2403 LiftoffRegister rhs) {
2404 Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2405 Mvn(dst.fp().V16B(), dst.fp().V16B());
2406 }
2407
2408 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2409 LiftoffRegister rhs) {
2410 Cmgt(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2411 }
2412
2413 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2414 LiftoffRegister rhs) {
2415 Cmhi(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2416 }
2417
2418 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2419 LiftoffRegister rhs) {
2420 Cmge(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2421 }
2422
2423 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2424 LiftoffRegister rhs) {
2425 Cmhs(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2426 }
2427
2428 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2429 LiftoffRegister rhs) {
2430 Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2431 }
2432
2433 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2434 LiftoffRegister rhs) {
2435 Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2436 Mvn(dst.fp().V8H(), dst.fp().V8H());
2437 }
2438
2439 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2440 LiftoffRegister rhs) {
2441 Cmgt(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2442 }
2443
2444 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2445 LiftoffRegister rhs) {
2446 Cmhi(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2447 }
2448
2449 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2450 LiftoffRegister rhs) {
2451 Cmge(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2452 }
2453
2454 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2455 LiftoffRegister rhs) {
2456 Cmhs(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2457 }
2458
2459 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2460 LiftoffRegister rhs) {
2461 Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2462 }
2463
2464 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2465 LiftoffRegister rhs) {
2466 Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2467 Mvn(dst.fp().V4S(), dst.fp().V4S());
2468 }
2469
2470 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2471 LiftoffRegister rhs) {
2472 Cmgt(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2473 }
2474
2475 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2476 LiftoffRegister rhs) {
2477 Cmhi(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2478 }
2479
2480 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2481 LiftoffRegister rhs) {
2482 Cmge(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2483 }
2484
2485 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2486 LiftoffRegister rhs) {
2487 Cmhs(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2488 }
2489
2490 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2491 LiftoffRegister rhs) {
2492 Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2493 }
2494
2495 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2496 LiftoffRegister rhs) {
2497 Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
2498 Mvn(dst.fp().V4S(), dst.fp().V4S());
2499 }
2500
2501 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2502 LiftoffRegister rhs) {
2503 Fcmgt(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2504 }
2505
2506 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2507 LiftoffRegister rhs) {
2508 Fcmge(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
2509 }
2510
2511 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2512 LiftoffRegister rhs) {
2513 Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2514 }
2515
2516 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2517 LiftoffRegister rhs) {
2518 Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
2519 Mvn(dst.fp().V2D(), dst.fp().V2D());
2520 }
2521
2522 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2523 LiftoffRegister rhs) {
2524 Fcmgt(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
2525 }
2526
2527 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2528 LiftoffRegister rhs) {
2529 Fcmge(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
2530 }
2531
2532 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2533 const uint8_t imms[16]) {
2534 uint64_t vals[2];
2535 memcpy(vals, imms, sizeof(vals));
2536 Movi(dst.fp().V16B(), vals[1], vals[0]);
2537 }
2538
2539 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2540 Mvn(dst.fp().V16B(), src.fp().V16B());
2541 }
2542
2543 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2544 LiftoffRegister rhs) {
2545 And(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2546 }
2547
2548 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2549 LiftoffRegister rhs) {
2550 Orr(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2551 }
2552
2553 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2554 LiftoffRegister rhs) {
2555 Eor(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2556 }
2557
2558 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2559 LiftoffRegister src1,
2560 LiftoffRegister src2,
2561 LiftoffRegister mask) {
2562 if (dst != mask) {
2563 Mov(dst.fp().V16B(), mask.fp().V16B());
2564 }
2565 Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B());
2566 }
2567
2568 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
2569 LiftoffRegister src) {
2570 Fcvtzs(dst.fp().V4S(), src.fp().V4S());
2571 }
2572
2573 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
2574 LiftoffRegister src) {
2575 Fcvtzu(dst.fp().V4S(), src.fp().V4S());
2576 }
2577
2578 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
2579 LiftoffRegister src) {
2580 Scvtf(dst.fp().V4S(), src.fp().V4S());
2581 }
2582
2583 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
2584 LiftoffRegister src) {
2585 Ucvtf(dst.fp().V4S(), src.fp().V4S());
2586 }
2587
2588 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
2589 LiftoffRegister lhs,
2590 LiftoffRegister rhs) {
2591 UseScratchRegisterScope temps(this);
2592 VRegister tmp = temps.AcquireV(kFormat8H);
2593 VRegister right = rhs.fp().V8H();
2594 if (dst == rhs) {
2595 Mov(tmp, right);
2596 right = tmp;
2597 }
2598 Sqxtn(dst.fp().V8B(), lhs.fp().V8H());
2599 Sqxtn2(dst.fp().V16B(), right);
2600 }
2601
2602 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
2603 LiftoffRegister lhs,
2604 LiftoffRegister rhs) {
2605 UseScratchRegisterScope temps(this);
2606 VRegister tmp = temps.AcquireV(kFormat8H);
2607 VRegister right = rhs.fp().V8H();
2608 if (dst == rhs) {
2609 Mov(tmp, right);
2610 right = tmp;
2611 }
2612 Sqxtun(dst.fp().V8B(), lhs.fp().V8H());
2613 Sqxtun2(dst.fp().V16B(), right);
2614 }
2615
2616 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
2617 LiftoffRegister lhs,
2618 LiftoffRegister rhs) {
2619 UseScratchRegisterScope temps(this);
2620 VRegister tmp = temps.AcquireV(kFormat4S);
2621 VRegister right = rhs.fp().V4S();
2622 if (dst == rhs) {
2623 Mov(tmp, right);
2624 right = tmp;
2625 }
2626 Sqxtn(dst.fp().V4H(), lhs.fp().V4S());
2627 Sqxtn2(dst.fp().V8H(), right);
2628 }
2629
2630 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
2631 LiftoffRegister lhs,
2632 LiftoffRegister rhs) {
2633 UseScratchRegisterScope temps(this);
2634 VRegister tmp = temps.AcquireV(kFormat4S);
2635 VRegister right = rhs.fp().V4S();
2636 if (dst == rhs) {
2637 Mov(tmp, right);
2638 right = tmp;
2639 }
2640 Sqxtun(dst.fp().V4H(), lhs.fp().V4S());
2641 Sqxtun2(dst.fp().V8H(), right);
2642 }
2643
2644 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
2645 LiftoffRegister src) {
2646 Sxtl(dst.fp().V8H(), src.fp().V8B());
2647 }
2648
2649 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
2650 LiftoffRegister src) {
2651 Sxtl2(dst.fp().V8H(), src.fp().V16B());
2652 }
2653
2654 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
2655 LiftoffRegister src) {
2656 Uxtl(dst.fp().V8H(), src.fp().V8B());
2657 }
2658
2659 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
2660 LiftoffRegister src) {
2661 Uxtl2(dst.fp().V8H(), src.fp().V16B());
2662 }
2663
2664 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
2665 LiftoffRegister src) {
2666 Sxtl(dst.fp().V4S(), src.fp().V4H());
2667 }
2668
2669 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
2670 LiftoffRegister src) {
2671 Sxtl2(dst.fp().V4S(), src.fp().V8H());
2672 }
2673
2674 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
2675 LiftoffRegister src) {
2676 Uxtl(dst.fp().V4S(), src.fp().V4H());
2677 }
2678
2679 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
2680 LiftoffRegister src) {
2681 Uxtl2(dst.fp().V4S(), src.fp().V8H());
2682 }
2683
2684 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
2685 LiftoffRegister lhs,
2686 LiftoffRegister rhs) {
2687 Bic(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2688 }
2689
2690 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
2691 LiftoffRegister lhs,
2692 LiftoffRegister rhs) {
2693 Urhadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
2694 }
2695
2696 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
2697 LiftoffRegister lhs,
2698 LiftoffRegister rhs) {
2699 Urhadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
2700 }
2701
2702 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
2703 LiftoffRegister src) {
2704 Abs(dst.fp().V16B(), src.fp().V16B());
2705 }
2706
2707 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
2708 LiftoffRegister src) {
2709 Abs(dst.fp().V8H(), src.fp().V8H());
2710 }
2711
2712 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
2713 LiftoffRegister src) {
2714 Abs(dst.fp().V4S(), src.fp().V4S());
2715 }
2716
2717 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
2718 Ldr(limit_address, MemOperand(limit_address));
2719 Cmp(sp, limit_address);
2720 B(ool_code, ls);
2721 }
2722
2723 void LiftoffAssembler::CallTrapCallbackForTesting() {
2724 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2725 }
2726
2727 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
2728 TurboAssembler::AssertUnreachable(reason);
2729 }
2730
2731 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
2732 PushCPURegList(liftoff::PadRegList(regs.GetGpList()));
2733 PushCPURegList(liftoff::PadVRegList(regs.GetFpList()));
2734 }
2735
2736 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
2737 PopCPURegList(liftoff::PadVRegList(regs.GetFpList()));
2738 PopCPURegList(liftoff::PadRegList(regs.GetGpList()));
2739 }
2740
2741 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
2742 DropSlots(num_stack_slots);
2743 Ret();
2744 }
2745
2746 void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
2747 const LiftoffRegister* args,
2748 const LiftoffRegister* rets,
2749 ValueType out_argument_type, int stack_bytes,
2750 ExternalReference ext_ref) {
2751 // The stack pointer is required to be quadword aligned.
2752 int total_size = RoundUp(stack_bytes, kQuadWordSizeInBytes);
2753 // Reserve space in the stack.
2754 Claim(total_size, 1);
2755
2756 int arg_bytes = 0;
2757 for (ValueType param_type : sig->parameters()) {
2758 Poke(liftoff::GetRegFromType(*args++, param_type), arg_bytes);
2759 arg_bytes += param_type.element_size_bytes();
2760 }
2761 DCHECK_LE(arg_bytes, stack_bytes);
2762
2763 // Pass a pointer to the buffer with the arguments to the C function.
2764 Mov(x0, sp);
2765
2766 // Now call the C function.
2767 constexpr int kNumCCallArgs = 1;
2768 CallCFunction(ext_ref, kNumCCallArgs);
2769
2770 // Move return value to the right register.
2771 const LiftoffRegister* next_result_reg = rets;
2772 if (sig->return_count() > 0) {
2773 DCHECK_EQ(1, sig->return_count());
2774 constexpr Register kReturnReg = x0;
2775 if (kReturnReg != next_result_reg->gp()) {
2776 Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
2777 }
2778 ++next_result_reg;
2779 }
2780
2781 // Load potential output value from the buffer on the stack.
2782 if (out_argument_type != kWasmStmt) {
2783 Peek(liftoff::GetRegFromType(*next_result_reg, out_argument_type), 0);
2784 }
2785
2786 Drop(total_size, 1);
2787 }
2788
2789 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
2790 Call(addr, RelocInfo::WASM_CALL);
2791 }
2792
2793 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
2794 Jump(addr, RelocInfo::WASM_CALL);
2795 }
2796
2797 void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig,
2798 compiler::CallDescriptor* call_descriptor,
2799 Register target) {
2800 // For Arm64, we have more cache registers than wasm parameters. That means
2801 // that target will always be in a register.
2802 DCHECK(target.is_valid());
2803 Call(target);
2804 }
2805
2806 void LiftoffAssembler::TailCallIndirect(Register target) {
2807 DCHECK(target.is_valid());
2808 // When control flow integrity is enabled, the target is a "bti c"
2809 // instruction, which enforces that the jump instruction is either a "blr", or
2810 // a "br" with x16 or x17 as its destination.
2811 UseScratchRegisterScope temps(this);
2812 temps.Exclude(x17);
2813 Mov(x17, target);
2814 Jump(x17);
2815 }
2816
2817 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
2818 // A direct call to a wasm runtime stub defined in this module.
2819 // Just encode the stub index. This will be patched at relocation.
2820 Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
2821 }
2822
2823 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
2824 // The stack pointer is required to be quadword aligned.
2825 size = RoundUp(size, kQuadWordSizeInBytes);
2826 Claim(size, 1);
2827 Mov(addr, sp);
2828 }
2829
2830 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
2831 // The stack pointer is required to be quadword aligned.
2832 size = RoundUp(size, kQuadWordSizeInBytes);
2833 Drop(size, 1);
2834 }
2835
2836 void LiftoffStackSlots::Construct() {
2837 size_t num_slots = 0;
2838 for (auto& slot : slots_) {
2839 num_slots += slot.src_.type() == kWasmS128 ? 2 : 1;
2840 }
2841 // The stack pointer is required to be quadword aligned.
2842 asm_->Claim(RoundUp(num_slots, 2));
2843 size_t poke_offset = num_slots * kXRegSize;
2844 for (auto& slot : slots_) {
2845 poke_offset -= slot.src_.type() == kWasmS128 ? kXRegSize * 2 : kXRegSize;
2846 switch (slot.src_.loc()) {
2847 case LiftoffAssembler::VarState::kStack: {
2848 UseScratchRegisterScope temps(asm_);
2849 CPURegister scratch = liftoff::AcquireByType(&temps, slot.src_.type());
2850 asm_->Ldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
2851 asm_->Poke(scratch, poke_offset);
2852 break;
2853 }
2854 case LiftoffAssembler::VarState::kRegister:
2855 asm_->Poke(liftoff::GetRegFromType(slot.src_.reg(), slot.src_.type()),
2856 poke_offset);
2857 break;
2858 case LiftoffAssembler::VarState::kIntConst:
2859 DCHECK(slot.src_.type() == kWasmI32 || slot.src_.type() == kWasmI64);
2860 if (slot.src_.i32_const() == 0) {
2861 Register zero_reg = slot.src_.type() == kWasmI32 ? wzr : xzr;
2862 asm_->Poke(zero_reg, poke_offset);
2863 } else {
2864 UseScratchRegisterScope temps(asm_);
2865 Register scratch = slot.src_.type() == kWasmI32 ? temps.AcquireW()
2866 : temps.AcquireX();
2867 asm_->Mov(scratch, int64_t{slot.src_.i32_const()});
2868 asm_->Poke(scratch, poke_offset);
2869 }
2870 break;
2871 }
2872 }
2873 }
2874
2875 } // namespace wasm
2876 } // namespace internal
2877 } // namespace v8
2878
2879 #endif // V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
2880