1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
6 #define V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
7 
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/assembler.h"
10 #include "src/codegen/cpu-features.h"
11 #include "src/codegen/machine-type.h"
12 #include "src/codegen/x64/register-x64.h"
13 #include "src/heap/memory-chunk.h"
14 #include "src/wasm/baseline/liftoff-assembler.h"
15 #include "src/wasm/simd-shuffle.h"
16 #include "src/wasm/wasm-objects.h"
17 
18 namespace v8 {
19 namespace internal {
20 namespace wasm {
21 
22 #define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name)    \
23   if (!CpuFeatures::IsSupported(name)) return false; \
24   CpuFeatureScope feature(this, name);
25 
26 namespace liftoff {
27 
ToCondition(LiftoffCondition liftoff_cond)28 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
29   switch (liftoff_cond) {
30     case kEqual:
31       return equal;
32     case kUnequal:
33       return not_equal;
34     case kSignedLessThan:
35       return less;
36     case kSignedLessEqual:
37       return less_equal;
38     case kSignedGreaterThan:
39       return greater;
40     case kSignedGreaterEqual:
41       return greater_equal;
42     case kUnsignedLessThan:
43       return below;
44     case kUnsignedLessEqual:
45       return below_equal;
46     case kUnsignedGreaterThan:
47       return above;
48     case kUnsignedGreaterEqual:
49       return above_equal;
50   }
51 }
52 
53 constexpr Register kScratchRegister2 = r11;
54 static_assert(kScratchRegister != kScratchRegister2, "collision");
55 static_assert((kLiftoffAssemblerGpCacheRegs &
56                Register::ListOf(kScratchRegister, kScratchRegister2)) == 0,
57               "scratch registers must not be used as cache registers");
58 
59 constexpr DoubleRegister kScratchDoubleReg2 = xmm14;
60 static_assert(kScratchDoubleReg != kScratchDoubleReg2, "collision");
61 static_assert((kLiftoffAssemblerFpCacheRegs &
62                DoubleRegister::ListOf(kScratchDoubleReg, kScratchDoubleReg2)) ==
63                   0,
64               "scratch registers must not be used as cache registers");
65 
66 // rbp-8 holds the stack marker, rbp-16 is the instance parameter.
67 constexpr int kInstanceOffset = 16;
68 
GetStackSlot(int offset)69 inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); }
70 
71 // TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
GetInstanceOperand()72 inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
73 
GetOSRTargetSlot()74 inline Operand GetOSRTargetSlot() { return GetStackSlot(kOSRTargetOffset); }
75 
GetMemOp(LiftoffAssembler * assm,Register addr,Register offset,uintptr_t offset_imm)76 inline Operand GetMemOp(LiftoffAssembler* assm, Register addr, Register offset,
77                         uintptr_t offset_imm) {
78   if (is_uint31(offset_imm)) {
79     int32_t offset_imm32 = static_cast<int32_t>(offset_imm);
80     return offset == no_reg ? Operand(addr, offset_imm32)
81                             : Operand(addr, offset, times_1, offset_imm32);
82   }
83   // Offset immediate does not fit in 31 bits.
84   Register scratch = kScratchRegister;
85   assm->TurboAssembler::Move(scratch, offset_imm);
86   if (offset != no_reg) assm->addq(scratch, offset);
87   return Operand(addr, scratch, times_1, 0);
88 }
89 
Load(LiftoffAssembler * assm,LiftoffRegister dst,Operand src,ValueKind kind)90 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Operand src,
91                  ValueKind kind) {
92   switch (kind) {
93     case kI32:
94       assm->movl(dst.gp(), src);
95       break;
96     case kI64:
97     case kOptRef:
98     case kRef:
99     case kRtt:
100     case kRttWithDepth:
101       assm->movq(dst.gp(), src);
102       break;
103     case kF32:
104       assm->Movss(dst.fp(), src);
105       break;
106     case kF64:
107       assm->Movsd(dst.fp(), src);
108       break;
109     case kS128:
110       assm->Movdqu(dst.fp(), src);
111       break;
112     default:
113       UNREACHABLE();
114   }
115 }
116 
Store(LiftoffAssembler * assm,Operand dst,LiftoffRegister src,ValueKind kind)117 inline void Store(LiftoffAssembler* assm, Operand dst, LiftoffRegister src,
118                   ValueKind kind) {
119   switch (kind) {
120     case kI32:
121       assm->movl(dst, src.gp());
122       break;
123     case kI64:
124       assm->movq(dst, src.gp());
125       break;
126     case kOptRef:
127     case kRef:
128     case kRtt:
129     case kRttWithDepth:
130       assm->StoreTaggedField(dst, src.gp());
131       break;
132     case kF32:
133       assm->Movss(dst, src.fp());
134       break;
135     case kF64:
136       assm->Movsd(dst, src.fp());
137       break;
138     case kS128:
139       assm->Movdqu(dst, src.fp());
140       break;
141     default:
142       UNREACHABLE();
143   }
144 }
145 
146 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
147                  int padding = 0) {
148   switch (kind) {
149     case kI32:
150     case kI64:
151     case kRef:
152     case kOptRef:
153       assm->AllocateStackSpace(padding);
154       assm->pushq(reg.gp());
155       break;
156     case kF32:
157       assm->AllocateStackSpace(kSystemPointerSize + padding);
158       assm->Movss(Operand(rsp, 0), reg.fp());
159       break;
160     case kF64:
161       assm->AllocateStackSpace(kSystemPointerSize + padding);
162       assm->Movsd(Operand(rsp, 0), reg.fp());
163       break;
164     case kS128:
165       assm->AllocateStackSpace(kSystemPointerSize * 2 + padding);
166       assm->Movdqu(Operand(rsp, 0), reg.fp());
167       break;
168     default:
169       UNREACHABLE();
170   }
171 }
172 
173 constexpr int kSubSpSize = 7;  // 7 bytes for "subq rsp, <imm32>"
174 
175 }  // namespace liftoff
176 
PrepareStackFrame()177 int LiftoffAssembler::PrepareStackFrame() {
178   int offset = pc_offset();
179   // Next we reserve the memory for the whole stack frame. We do not know yet
180   // how big the stack frame will be so we just emit a placeholder instruction.
181   // PatchPrepareStackFrame will patch this in order to increase the stack
182   // appropriately.
183   sub_sp_32(0);
184   DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset);
185   return offset;
186 }
187 
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)188 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
189                                        int stack_param_delta) {
190   // Push the return address and frame pointer to complete the stack frame.
191   pushq(Operand(rbp, 8));
192   pushq(Operand(rbp, 0));
193 
194   // Shift the whole frame upwards.
195   const int slot_count = num_callee_stack_params + 2;
196   for (int i = slot_count - 1; i >= 0; --i) {
197     movq(kScratchRegister, Operand(rsp, i * 8));
198     movq(Operand(rbp, (i - stack_param_delta) * 8), kScratchRegister);
199   }
200 
201   // Set the new stack and frame pointer.
202   leaq(rsp, Operand(rbp, -stack_param_delta * 8));
203   popq(rbp);
204 }
205 
AlignFrameSize()206 void LiftoffAssembler::AlignFrameSize() {
207   max_used_spill_offset_ = RoundUp(max_used_spill_offset_, kSystemPointerSize);
208 }
209 
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)210 void LiftoffAssembler::PatchPrepareStackFrame(
211     int offset, SafepointTableBuilder* safepoint_table_builder) {
212   // The frame_size includes the frame marker and the instance slot. Both are
213   // pushed as part of frame construction, so we don't need to allocate memory
214   // for them anymore.
215   int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
216   DCHECK_EQ(0, frame_size % kSystemPointerSize);
217 
218   // We can't run out of space when patching, just pass anything big enough to
219   // not cause the assembler to try to grow the buffer.
220   constexpr int kAvailableSpace = 64;
221   Assembler patching_assembler(
222       AssemblerOptions{},
223       ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
224 
225   if (V8_LIKELY(frame_size < 4 * KB)) {
226     // This is the standard case for small frames: just subtract from SP and be
227     // done with it.
228     patching_assembler.sub_sp_32(frame_size);
229     DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
230     return;
231   }
232 
233   // The frame size is bigger than 4KB, so we might overflow the available stack
234   // space if we first allocate the frame and then do the stack check (we will
235   // need some remaining stack space for throwing the exception). That's why we
236   // check the available stack space before we allocate the frame. To do this we
237   // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
238   // "extended stack check".
239   //
240   // The OOL code can simply be generated here with the normal assembler,
241   // because all other code generation, including OOL code, has already finished
242   // when {PatchPrepareStackFrame} is called. The function prologue then jumps
243   // to the current {pc_offset()} to execute the OOL code for allocating the
244   // large frame.
245 
246   // Emit the unconditional branch in the function prologue (from {offset} to
247   // {pc_offset()}).
248   patching_assembler.jmp_rel(pc_offset() - offset);
249   DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
250   patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
251 
252   // If the frame is bigger than the stack, we throw the stack overflow
253   // exception unconditionally. Thereby we can avoid the integer overflow
254   // check in the condition code.
255   RecordComment("OOL: stack check for large frame");
256   Label continuation;
257   if (frame_size < FLAG_stack_size * 1024) {
258     movq(kScratchRegister,
259          FieldOperand(kWasmInstanceRegister,
260                       WasmInstanceObject::kRealStackLimitAddressOffset));
261     movq(kScratchRegister, Operand(kScratchRegister, 0));
262     addq(kScratchRegister, Immediate(frame_size));
263     cmpq(rsp, kScratchRegister);
264     j(above_equal, &continuation, Label::kNear);
265   }
266 
267   near_call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
268   // The call will not return; just define an empty safepoint.
269   safepoint_table_builder->DefineSafepoint(this);
270   AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
271 
272   bind(&continuation);
273 
274   // Now allocate the stack space. Note that this might do more than just
275   // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
276   AllocateStackSpace(frame_size);
277 
278   // Jump back to the start of the function, from {pc_offset()} to
279   // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
280   // is a branch now).
281   int func_start_offset = offset + liftoff::kSubSpSize;
282   jmp_rel(func_start_offset - pc_offset());
283 }
284 
FinishCode()285 void LiftoffAssembler::FinishCode() {}
286 
AbortCompilation()287 void LiftoffAssembler::AbortCompilation() {}
288 
289 // static
StaticStackFrameSize()290 constexpr int LiftoffAssembler::StaticStackFrameSize() {
291   return kOSRTargetOffset;
292 }
293 
SlotSizeForType(ValueKind kind)294 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
295   return is_reference(kind) ? kSystemPointerSize : element_size_bytes(kind);
296 }
297 
NeedsAlignment(ValueKind kind)298 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
299   return is_reference(kind);
300 }
301 
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)302 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
303                                     RelocInfo::Mode rmode) {
304   switch (value.type().kind()) {
305     case kI32:
306       if (value.to_i32() == 0 && RelocInfo::IsNone(rmode)) {
307         xorl(reg.gp(), reg.gp());
308       } else {
309         movl(reg.gp(), Immediate(value.to_i32(), rmode));
310       }
311       break;
312     case kI64:
313       if (RelocInfo::IsNone(rmode)) {
314         TurboAssembler::Move(reg.gp(), value.to_i64());
315       } else {
316         movq(reg.gp(), Immediate64(value.to_i64(), rmode));
317       }
318       break;
319     case kF32:
320       TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
321       break;
322     case kF64:
323       TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
324       break;
325     default:
326       UNREACHABLE();
327   }
328 }
329 
LoadInstanceFromFrame(Register dst)330 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
331   movq(dst, liftoff::GetInstanceOperand());
332 }
333 
LoadFromInstance(Register dst,Register instance,int offset,int size)334 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
335                                         int offset, int size) {
336   DCHECK_LE(0, offset);
337   Operand src{instance, offset};
338   switch (size) {
339     case 1:
340       movzxbl(dst, src);
341       break;
342     case 4:
343       movl(dst, src);
344       break;
345     case 8:
346       movq(dst, src);
347       break;
348     default:
349       UNIMPLEMENTED();
350   }
351 }
352 
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)353 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
354                                                      Register instance,
355                                                      int offset) {
356   DCHECK_LE(0, offset);
357   LoadTaggedPointerField(dst, Operand(instance, offset));
358 }
359 
LoadExternalPointer(Register dst,Register instance,int offset,ExternalPointerTag tag,Register isolate_root)360 void LiftoffAssembler::LoadExternalPointer(Register dst, Register instance,
361                                            int offset, ExternalPointerTag tag,
362                                            Register isolate_root) {
363   LoadExternalPointerField(dst, FieldOperand(instance, offset), tag,
364                            isolate_root,
365                            IsolateRootLocation::kInScratchRegister);
366 }
367 
SpillInstance(Register instance)368 void LiftoffAssembler::SpillInstance(Register instance) {
369   movq(liftoff::GetInstanceOperand(), instance);
370 }
371 
ResetOSRTarget()372 void LiftoffAssembler::ResetOSRTarget() {
373   movq(liftoff::GetOSRTargetSlot(), Immediate(0));
374 }
375 
FillInstanceInto(Register dst)376 void LiftoffAssembler::FillInstanceInto(Register dst) {
377   movq(dst, liftoff::GetInstanceOperand());
378 }
379 
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)380 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
381                                          Register offset_reg,
382                                          int32_t offset_imm,
383                                          LiftoffRegList pinned) {
384   DCHECK_GE(offset_imm, 0);
385   if (FLAG_debug_code && offset_reg != no_reg) {
386     AssertZeroExtended(offset_reg);
387   }
388   Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg,
389                                      static_cast<uint32_t>(offset_imm));
390   LoadTaggedPointerField(dst, src_op);
391 }
392 
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)393 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
394                                        int32_t offset_imm) {
395   Operand src_op = liftoff::GetMemOp(this, src_addr, no_reg,
396                                      static_cast<uint32_t>(offset_imm));
397   movq(dst, src_op);
398 }
399 
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)400 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
401                                           Register offset_reg,
402                                           int32_t offset_imm,
403                                           LiftoffRegister src,
404                                           LiftoffRegList pinned,
405                                           SkipWriteBarrier skip_write_barrier) {
406   DCHECK_GE(offset_imm, 0);
407   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg,
408                                      static_cast<uint32_t>(offset_imm));
409   StoreTaggedField(dst_op, src.gp());
410 
411   if (skip_write_barrier || FLAG_disable_write_barriers) return;
412 
413   Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
414   Label write_barrier;
415   Label exit;
416   CheckPageFlag(dst_addr, scratch,
417                 MemoryChunk::kPointersFromHereAreInterestingMask, not_zero,
418                 &write_barrier, Label::kNear);
419   jmp(&exit, Label::kNear);
420   bind(&write_barrier);
421   JumpIfSmi(src.gp(), &exit, Label::kNear);
422   if (COMPRESS_POINTERS_BOOL) {
423     DecompressTaggedPointer(src.gp(), src.gp());
424   }
425   CheckPageFlag(src.gp(), scratch,
426                 MemoryChunk::kPointersToHereAreInterestingMask, zero, &exit,
427                 Label::kNear);
428   leaq(scratch, dst_op);
429 
430   CallRecordWriteStubSaveRegisters(
431       dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
432       StubCallMode::kCallWasmRuntimeStub);
433   bind(&exit);
434 }
435 
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned)436 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
437                                   Register offset_reg, uintptr_t offset_imm,
438                                   LoadType type, LiftoffRegList pinned) {
439   Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
440 }
441 
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)442 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
443                             Register offset_reg, uintptr_t offset_imm,
444                             LoadType type, LiftoffRegList pinned,
445                             uint32_t* protected_load_pc, bool is_load_mem,
446                             bool i64_offset) {
447   if (offset_reg != no_reg && !i64_offset) {
448     AssertZeroExtended(offset_reg);
449   }
450   Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
451   if (protected_load_pc) *protected_load_pc = pc_offset();
452   switch (type.value()) {
453     case LoadType::kI32Load8U:
454     case LoadType::kI64Load8U:
455       movzxbl(dst.gp(), src_op);
456       break;
457     case LoadType::kI32Load8S:
458       movsxbl(dst.gp(), src_op);
459       break;
460     case LoadType::kI64Load8S:
461       movsxbq(dst.gp(), src_op);
462       break;
463     case LoadType::kI32Load16U:
464     case LoadType::kI64Load16U:
465       movzxwl(dst.gp(), src_op);
466       break;
467     case LoadType::kI32Load16S:
468       movsxwl(dst.gp(), src_op);
469       break;
470     case LoadType::kI64Load16S:
471       movsxwq(dst.gp(), src_op);
472       break;
473     case LoadType::kI32Load:
474     case LoadType::kI64Load32U:
475       movl(dst.gp(), src_op);
476       break;
477     case LoadType::kI64Load32S:
478       movsxlq(dst.gp(), src_op);
479       break;
480     case LoadType::kI64Load:
481       movq(dst.gp(), src_op);
482       break;
483     case LoadType::kF32Load:
484       Movss(dst.fp(), src_op);
485       break;
486     case LoadType::kF64Load:
487       Movsd(dst.fp(), src_op);
488       break;
489     case LoadType::kS128Load:
490       Movdqu(dst.fp(), src_op);
491       break;
492   }
493 }
494 
Store(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList,uint32_t * protected_store_pc,bool is_store_mem)495 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
496                              uintptr_t offset_imm, LiftoffRegister src,
497                              StoreType type, LiftoffRegList /* pinned */,
498                              uint32_t* protected_store_pc, bool is_store_mem) {
499   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
500   if (protected_store_pc) *protected_store_pc = pc_offset();
501   switch (type.value()) {
502     case StoreType::kI32Store8:
503     case StoreType::kI64Store8:
504       movb(dst_op, src.gp());
505       break;
506     case StoreType::kI32Store16:
507     case StoreType::kI64Store16:
508       movw(dst_op, src.gp());
509       break;
510     case StoreType::kI32Store:
511     case StoreType::kI64Store32:
512       movl(dst_op, src.gp());
513       break;
514     case StoreType::kI64Store:
515       movq(dst_op, src.gp());
516       break;
517     case StoreType::kF32Store:
518       Movss(dst_op, src.fp());
519       break;
520     case StoreType::kF64Store:
521       Movsd(dst_op, src.fp());
522       break;
523     case StoreType::kS128Store:
524       Movdqu(dst_op, src.fp());
525       break;
526   }
527 }
528 
AtomicStore(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)529 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
530                                    uintptr_t offset_imm, LiftoffRegister src,
531                                    StoreType type, LiftoffRegList pinned) {
532   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
533   Register src_reg = src.gp();
534   if (cache_state()->is_used(src)) {
535     movq(kScratchRegister, src_reg);
536     src_reg = kScratchRegister;
537   }
538   switch (type.value()) {
539     case StoreType::kI32Store8:
540     case StoreType::kI64Store8:
541       xchgb(src_reg, dst_op);
542       break;
543     case StoreType::kI32Store16:
544     case StoreType::kI64Store16:
545       xchgw(src_reg, dst_op);
546       break;
547     case StoreType::kI32Store:
548     case StoreType::kI64Store32:
549       xchgl(src_reg, dst_op);
550       break;
551     case StoreType::kI64Store:
552       xchgq(src_reg, dst_op);
553       break;
554     default:
555       UNREACHABLE();
556   }
557 }
558 
AtomicAdd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)559 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
560                                  uintptr_t offset_imm, LiftoffRegister value,
561                                  LiftoffRegister result, StoreType type) {
562   DCHECK(!cache_state()->is_used(result));
563   if (cache_state()->is_used(value)) {
564     // We cannot overwrite {value}, but the {value} register is changed in the
565     // code we generate. Therefore we copy {value} to {result} and use the
566     // {result} register in the code below.
567     movq(result.gp(), value.gp());
568     value = result;
569   }
570   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
571   lock();
572   switch (type.value()) {
573     case StoreType::kI32Store8:
574     case StoreType::kI64Store8:
575       xaddb(dst_op, value.gp());
576       movzxbq(result.gp(), value.gp());
577       break;
578     case StoreType::kI32Store16:
579     case StoreType::kI64Store16:
580       xaddw(dst_op, value.gp());
581       movzxwq(result.gp(), value.gp());
582       break;
583     case StoreType::kI32Store:
584     case StoreType::kI64Store32:
585       xaddl(dst_op, value.gp());
586       if (value != result) {
587         movq(result.gp(), value.gp());
588       }
589       break;
590     case StoreType::kI64Store:
591       xaddq(dst_op, value.gp());
592       if (value != result) {
593         movq(result.gp(), value.gp());
594       }
595       break;
596     default:
597       UNREACHABLE();
598   }
599 }
600 
AtomicSub(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)601 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
602                                  uintptr_t offset_imm, LiftoffRegister value,
603                                  LiftoffRegister result, StoreType type) {
604   DCHECK(!cache_state()->is_used(result));
605   if (cache_state()->is_used(value)) {
606     // We cannot overwrite {value}, but the {value} register is changed in the
607     // code we generate. Therefore we copy {value} to {result} and use the
608     // {result} register in the code below.
609     movq(result.gp(), value.gp());
610     value = result;
611   }
612   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
613   switch (type.value()) {
614     case StoreType::kI32Store8:
615     case StoreType::kI64Store8:
616       negb(value.gp());
617       lock();
618       xaddb(dst_op, value.gp());
619       movzxbq(result.gp(), value.gp());
620       break;
621     case StoreType::kI32Store16:
622     case StoreType::kI64Store16:
623       negw(value.gp());
624       lock();
625       xaddw(dst_op, value.gp());
626       movzxwq(result.gp(), value.gp());
627       break;
628     case StoreType::kI32Store:
629     case StoreType::kI64Store32:
630       negl(value.gp());
631       lock();
632       xaddl(dst_op, value.gp());
633       if (value != result) {
634         movq(result.gp(), value.gp());
635       }
636       break;
637     case StoreType::kI64Store:
638       negq(value.gp());
639       lock();
640       xaddq(dst_op, value.gp());
641       if (value != result) {
642         movq(result.gp(), value.gp());
643       }
644       break;
645     default:
646       UNREACHABLE();
647   }
648 }
649 
650 namespace liftoff {
651 #define __ lasm->
652 
AtomicBinop(LiftoffAssembler * lasm,void (Assembler::* opl)(Register,Register),void (Assembler::* opq)(Register,Register),Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)653 inline void AtomicBinop(LiftoffAssembler* lasm,
654                         void (Assembler::*opl)(Register, Register),
655                         void (Assembler::*opq)(Register, Register),
656                         Register dst_addr, Register offset_reg,
657                         uintptr_t offset_imm, LiftoffRegister value,
658                         LiftoffRegister result, StoreType type) {
659   DCHECK(!__ cache_state()->is_used(result));
660   Register value_reg = value.gp();
661   // The cmpxchg instruction uses rax to store the old value of the
662   // compare-exchange primitive. Therefore we have to spill the register and
663   // move any use to another register.
664   LiftoffRegList pinned =
665       LiftoffRegList::ForRegs(dst_addr, offset_reg, value_reg);
666   __ ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
667   Operand dst_op = liftoff::GetMemOp(lasm, dst_addr, offset_reg, offset_imm);
668 
669   switch (type.value()) {
670     case StoreType::kI32Store8:
671     case StoreType::kI64Store8: {
672       Label binop;
673       __ xorq(rax, rax);
674       __ movb(rax, dst_op);
675       __ bind(&binop);
676       __ movl(kScratchRegister, rax);
677       (lasm->*opl)(kScratchRegister, value_reg);
678       __ lock();
679       __ cmpxchgb(dst_op, kScratchRegister);
680       __ j(not_equal, &binop);
681       break;
682     }
683     case StoreType::kI32Store16:
684     case StoreType::kI64Store16: {
685       Label binop;
686       __ xorq(rax, rax);
687       __ movw(rax, dst_op);
688       __ bind(&binop);
689       __ movl(kScratchRegister, rax);
690       (lasm->*opl)(kScratchRegister, value_reg);
691       __ lock();
692       __ cmpxchgw(dst_op, kScratchRegister);
693       __ j(not_equal, &binop);
694       break;
695     }
696     case StoreType::kI32Store:
697     case StoreType::kI64Store32: {
698       Label binop;
699       __ movl(rax, dst_op);
700       __ bind(&binop);
701       __ movl(kScratchRegister, rax);
702       (lasm->*opl)(kScratchRegister, value_reg);
703       __ lock();
704       __ cmpxchgl(dst_op, kScratchRegister);
705       __ j(not_equal, &binop);
706       break;
707     }
708     case StoreType::kI64Store: {
709       Label binop;
710       __ movq(rax, dst_op);
711       __ bind(&binop);
712       __ movq(kScratchRegister, rax);
713       (lasm->*opq)(kScratchRegister, value_reg);
714       __ lock();
715       __ cmpxchgq(dst_op, kScratchRegister);
716       __ j(not_equal, &binop);
717       break;
718     }
719     default:
720       UNREACHABLE();
721   }
722 
723   if (result.gp() != rax) {
724     __ movq(result.gp(), rax);
725   }
726 }
727 #undef __
728 }  // namespace liftoff
729 
AtomicAnd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)730 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
731                                  uintptr_t offset_imm, LiftoffRegister value,
732                                  LiftoffRegister result, StoreType type) {
733   liftoff::AtomicBinop(this, &Assembler::andl, &Assembler::andq, dst_addr,
734                        offset_reg, offset_imm, value, result, type);
735 }
736 
AtomicOr(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)737 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
738                                 uintptr_t offset_imm, LiftoffRegister value,
739                                 LiftoffRegister result, StoreType type) {
740   liftoff::AtomicBinop(this, &Assembler::orl, &Assembler::orq, dst_addr,
741                        offset_reg, offset_imm, value, result, type);
742 }
743 
AtomicXor(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)744 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
745                                  uintptr_t offset_imm, LiftoffRegister value,
746                                  LiftoffRegister result, StoreType type) {
747   liftoff::AtomicBinop(this, &Assembler::xorl, &Assembler::xorq, dst_addr,
748                        offset_reg, offset_imm, value, result, type);
749 }
750 
AtomicExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)751 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
752                                       uintptr_t offset_imm,
753                                       LiftoffRegister value,
754                                       LiftoffRegister result, StoreType type) {
755   DCHECK(!cache_state()->is_used(result));
756   if (cache_state()->is_used(value)) {
757     // We cannot overwrite {value}, but the {value} register is changed in the
758     // code we generate. Therefore we copy {value} to {result} and use the
759     // {result} register in the code below.
760     movq(result.gp(), value.gp());
761     value = result;
762   }
763   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
764   switch (type.value()) {
765     case StoreType::kI32Store8:
766     case StoreType::kI64Store8:
767       xchgb(value.gp(), dst_op);
768       movzxbq(result.gp(), value.gp());
769       break;
770     case StoreType::kI32Store16:
771     case StoreType::kI64Store16:
772       xchgw(value.gp(), dst_op);
773       movzxwq(result.gp(), value.gp());
774       break;
775     case StoreType::kI32Store:
776     case StoreType::kI64Store32:
777       xchgl(value.gp(), dst_op);
778       if (value != result) {
779         movq(result.gp(), value.gp());
780       }
781       break;
782     case StoreType::kI64Store:
783       xchgq(value.gp(), dst_op);
784       if (value != result) {
785         movq(result.gp(), value.gp());
786       }
787       break;
788     default:
789       UNREACHABLE();
790   }
791 }
792 
AtomicCompareExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)793 void LiftoffAssembler::AtomicCompareExchange(
794     Register dst_addr, Register offset_reg, uintptr_t offset_imm,
795     LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
796     StoreType type) {
797   Register value_reg = new_value.gp();
798   // The cmpxchg instruction uses rax to store the old value of the
799   // compare-exchange primitive. Therefore we have to spill the register and
800   // move any use to another register.
801   LiftoffRegList pinned =
802       LiftoffRegList::ForRegs(dst_addr, offset_reg, expected, value_reg);
803   ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
804   if (expected.gp() != rax) {
805     movq(rax, expected.gp());
806   }
807 
808   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
809 
810   lock();
811   switch (type.value()) {
812     case StoreType::kI32Store8:
813     case StoreType::kI64Store8: {
814       cmpxchgb(dst_op, value_reg);
815       movzxbq(result.gp(), rax);
816       break;
817     }
818     case StoreType::kI32Store16:
819     case StoreType::kI64Store16: {
820       cmpxchgw(dst_op, value_reg);
821       movzxwq(result.gp(), rax);
822       break;
823     }
824     case StoreType::kI32Store: {
825       cmpxchgl(dst_op, value_reg);
826       if (result.gp() != rax) {
827         movl(result.gp(), rax);
828       }
829       break;
830     }
831     case StoreType::kI64Store32: {
832       cmpxchgl(dst_op, value_reg);
833       // Zero extension.
834       movl(result.gp(), rax);
835       break;
836     }
837     case StoreType::kI64Store: {
838       cmpxchgq(dst_op, value_reg);
839       if (result.gp() != rax) {
840         movq(result.gp(), rax);
841       }
842       break;
843     }
844     default:
845       UNREACHABLE();
846   }
847 }
848 
AtomicFence()849 void LiftoffAssembler::AtomicFence() { mfence(); }
850 
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)851 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
852                                            uint32_t caller_slot_idx,
853                                            ValueKind kind) {
854   Operand src(rbp, kSystemPointerSize * (caller_slot_idx + 1));
855   liftoff::Load(this, dst, src, kind);
856 }
857 
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)858 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
859                                             uint32_t caller_slot_idx,
860                                             ValueKind kind) {
861   Operand dst(rbp, kSystemPointerSize * (caller_slot_idx + 1));
862   liftoff::Store(this, dst, src, kind);
863 }
864 
LoadReturnStackSlot(LiftoffRegister reg,int offset,ValueKind kind)865 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
866                                            ValueKind kind) {
867   Operand src(rsp, offset);
868   liftoff::Load(this, reg, src, kind);
869 }
870 
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)871 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
872                                       ValueKind kind) {
873   DCHECK_NE(dst_offset, src_offset);
874   Operand dst = liftoff::GetStackSlot(dst_offset);
875   Operand src = liftoff::GetStackSlot(src_offset);
876   if (element_size_log2(kind) == 2) {
877     movl(kScratchRegister, src);
878     movl(dst, kScratchRegister);
879   } else {
880     DCHECK_EQ(3, element_size_log2(kind));
881     movq(kScratchRegister, src);
882     movq(dst, kScratchRegister);
883   }
884 }
885 
Move(Register dst,Register src,ValueKind kind)886 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
887   DCHECK_NE(dst, src);
888   if (kind == kI32) {
889     movl(dst, src);
890   } else {
891     DCHECK(kI64 == kind || is_reference(kind));
892     movq(dst, src);
893   }
894 }
895 
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)896 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
897                             ValueKind kind) {
898   DCHECK_NE(dst, src);
899   if (kind == kF32) {
900     Movss(dst, src);
901   } else if (kind == kF64) {
902     Movsd(dst, src);
903   } else {
904     DCHECK_EQ(kS128, kind);
905     Movapd(dst, src);
906   }
907 }
908 
Spill(int offset,LiftoffRegister reg,ValueKind kind)909 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
910   RecordUsedSpillOffset(offset);
911   Operand dst = liftoff::GetStackSlot(offset);
912   switch (kind) {
913     case kI32:
914       movl(dst, reg.gp());
915       break;
916     case kI64:
917     case kOptRef:
918     case kRef:
919     case kRtt:
920     case kRttWithDepth:
921       movq(dst, reg.gp());
922       break;
923     case kF32:
924       Movss(dst, reg.fp());
925       break;
926     case kF64:
927       Movsd(dst, reg.fp());
928       break;
929     case kS128:
930       Movdqu(dst, reg.fp());
931       break;
932     default:
933       UNREACHABLE();
934   }
935 }
936 
Spill(int offset,WasmValue value)937 void LiftoffAssembler::Spill(int offset, WasmValue value) {
938   RecordUsedSpillOffset(offset);
939   Operand dst = liftoff::GetStackSlot(offset);
940   switch (value.type().kind()) {
941     case kI32:
942       movl(dst, Immediate(value.to_i32()));
943       break;
944     case kI64: {
945       if (is_int32(value.to_i64())) {
946         // Sign extend low word.
947         movq(dst, Immediate(static_cast<int32_t>(value.to_i64())));
948       } else if (is_uint32(value.to_i64())) {
949         // Zero extend low word.
950         movl(kScratchRegister, Immediate(static_cast<int32_t>(value.to_i64())));
951         movq(dst, kScratchRegister);
952       } else {
953         movq(kScratchRegister, value.to_i64());
954         movq(dst, kScratchRegister);
955       }
956       break;
957     }
958     default:
959       // We do not track f32 and f64 constants, hence they are unreachable.
960       UNREACHABLE();
961   }
962 }
963 
Fill(LiftoffRegister reg,int offset,ValueKind kind)964 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
965   liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
966 }
967 
FillI64Half(Register,int offset,RegPairHalf)968 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
969   UNREACHABLE();
970 }
971 
FillStackSlotsWithZero(int start,int size)972 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
973   DCHECK_LT(0, size);
974   RecordUsedSpillOffset(start + size);
975 
976   if (size <= 3 * kStackSlotSize) {
977     // Special straight-line code for up to three slots
978     // (7-10 bytes per slot: REX C7 <1-4 bytes op> <4 bytes imm>),
979     // And a movd (6-9 byte) when size % 8 != 0;
980     uint32_t remainder = size;
981     for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) {
982       movq(liftoff::GetStackSlot(start + remainder), Immediate(0));
983     }
984     DCHECK(remainder == 4 || remainder == 0);
985     if (remainder) {
986       movl(liftoff::GetStackSlot(start + remainder), Immediate(0));
987     }
988   } else {
989     // General case for bigger counts.
990     // This sequence takes 19-22 bytes (3 for pushes, 4-7 for lea, 2 for xor, 5
991     // for mov, 2 for repstosl, 3 for pops).
992     pushq(rax);
993     pushq(rcx);
994     pushq(rdi);
995     leaq(rdi, liftoff::GetStackSlot(start + size));
996     xorl(rax, rax);
997     // Convert size (bytes) to doublewords (4-bytes).
998     movl(rcx, Immediate(size / 4));
999     repstosl();
1000     popq(rdi);
1001     popq(rcx);
1002     popq(rax);
1003   }
1004 }
1005 
emit_i32_add(Register dst,Register lhs,Register rhs)1006 void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1007   if (lhs != dst) {
1008     leal(dst, Operand(lhs, rhs, times_1, 0));
1009   } else {
1010     addl(dst, rhs);
1011   }
1012 }
1013 
emit_i32_addi(Register dst,Register lhs,int32_t imm)1014 void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1015   if (lhs != dst) {
1016     leal(dst, Operand(lhs, imm));
1017   } else {
1018     addl(dst, Immediate(imm));
1019   }
1020 }
1021 
emit_i32_sub(Register dst,Register lhs,Register rhs)1022 void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1023   if (dst != rhs) {
1024     // Default path.
1025     if (dst != lhs) movl(dst, lhs);
1026     subl(dst, rhs);
1027   } else if (lhs == rhs) {
1028     // Degenerate case.
1029     xorl(dst, dst);
1030   } else {
1031     // Emit {dst = lhs + -rhs} if dst == rhs.
1032     negl(dst);
1033     addl(dst, lhs);
1034   }
1035 }
1036 
emit_i32_subi(Register dst,Register lhs,int32_t imm)1037 void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1038   if (dst != lhs) {
1039     // We'll have to implement an UB-safe version if we need this corner case.
1040     DCHECK_NE(imm, kMinInt);
1041     leal(dst, Operand(lhs, -imm));
1042   } else {
1043     subl(dst, Immediate(imm));
1044   }
1045 }
1046 
1047 namespace liftoff {
1048 template <void (Assembler::*op)(Register, Register),
1049           void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOp(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs)1050 void EmitCommutativeBinOp(LiftoffAssembler* assm, Register dst, Register lhs,
1051                           Register rhs) {
1052   if (dst == rhs) {
1053     (assm->*op)(dst, lhs);
1054   } else {
1055     if (dst != lhs) (assm->*mov)(dst, lhs);
1056     (assm->*op)(dst, rhs);
1057   }
1058 }
1059 
1060 template <void (Assembler::*op)(Register, Immediate),
1061           void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOpImm(LiftoffAssembler * assm,Register dst,Register lhs,int32_t imm)1062 void EmitCommutativeBinOpImm(LiftoffAssembler* assm, Register dst, Register lhs,
1063                              int32_t imm) {
1064   if (dst != lhs) (assm->*mov)(dst, lhs);
1065   (assm->*op)(dst, Immediate(imm));
1066 }
1067 
1068 }  // namespace liftoff
1069 
emit_i32_mul(Register dst,Register lhs,Register rhs)1070 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1071   liftoff::EmitCommutativeBinOp<&Assembler::imull, &Assembler::movl>(this, dst,
1072                                                                      lhs, rhs);
1073 }
1074 
1075 namespace liftoff {
1076 enum class DivOrRem : uint8_t { kDiv, kRem };
1077 template <typename type, DivOrRem div_or_rem>
EmitIntDivOrRem(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1078 void EmitIntDivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
1079                      Register rhs, Label* trap_div_by_zero,
1080                      Label* trap_div_unrepresentable) {
1081   constexpr bool needs_unrepresentable_check =
1082       std::is_signed<type>::value && div_or_rem == DivOrRem::kDiv;
1083   constexpr bool special_case_minus_1 =
1084       std::is_signed<type>::value && div_or_rem == DivOrRem::kRem;
1085   DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1086 
1087 #define iop(name, ...)            \
1088   do {                            \
1089     if (sizeof(type) == 4) {      \
1090       assm->name##l(__VA_ARGS__); \
1091     } else {                      \
1092       assm->name##q(__VA_ARGS__); \
1093     }                             \
1094   } while (false)
1095 
1096   // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1097   // these registers are unused. If {rhs} is stored in one of them, move it to
1098   // another temporary register.
1099   // Do all this before any branch, such that the code is executed
1100   // unconditionally, as the cache state will also be modified unconditionally.
1101   assm->SpillRegisters(rdx, rax);
1102   if (rhs == rax || rhs == rdx) {
1103     iop(mov, kScratchRegister, rhs);
1104     rhs = kScratchRegister;
1105   }
1106 
1107   // Check for division by zero.
1108   iop(test, rhs, rhs);
1109   assm->j(zero, trap_div_by_zero);
1110 
1111   Label done;
1112   if (needs_unrepresentable_check) {
1113     // Check for {kMinInt / -1}. This is unrepresentable.
1114     Label do_div;
1115     iop(cmp, rhs, Immediate(-1));
1116     assm->j(not_equal, &do_div);
1117     // {lhs} is min int if {lhs - 1} overflows.
1118     iop(cmp, lhs, Immediate(1));
1119     assm->j(overflow, trap_div_unrepresentable);
1120     assm->bind(&do_div);
1121   } else if (special_case_minus_1) {
1122     // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1123     // cannot be computed).
1124     Label do_rem;
1125     iop(cmp, rhs, Immediate(-1));
1126     assm->j(not_equal, &do_rem);
1127     // clang-format off
1128     // (conflicts with presubmit checks because it is confused about "xor")
1129     iop(xor, dst, dst);
1130     // clang-format on
1131     assm->jmp(&done);
1132     assm->bind(&do_rem);
1133   }
1134 
1135   // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1136   // do the division.
1137   if (lhs != rax) iop(mov, rax, lhs);
1138   if (std::is_same<int32_t, type>::value) {  // i32
1139     assm->cdq();
1140     assm->idivl(rhs);
1141   } else if (std::is_same<uint32_t, type>::value) {  // u32
1142     assm->xorl(rdx, rdx);
1143     assm->divl(rhs);
1144   } else if (std::is_same<int64_t, type>::value) {  // i64
1145     assm->cqo();
1146     assm->idivq(rhs);
1147   } else {  // u64
1148     assm->xorq(rdx, rdx);
1149     assm->divq(rhs);
1150   }
1151 
1152   // Move back the result (in {eax} or {edx}) into the {dst} register.
1153   constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? rax : rdx;
1154   if (dst != kResultReg) {
1155     iop(mov, dst, kResultReg);
1156   }
1157   if (special_case_minus_1) assm->bind(&done);
1158 }
1159 }  // namespace liftoff
1160 
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1161 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1162                                      Label* trap_div_by_zero,
1163                                      Label* trap_div_unrepresentable) {
1164   liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kDiv>(
1165       this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1166 }
1167 
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1168 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1169                                      Label* trap_div_by_zero) {
1170   liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kDiv>(
1171       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1172 }
1173 
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1174 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1175                                      Label* trap_div_by_zero) {
1176   liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kRem>(
1177       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1178 }
1179 
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1180 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1181                                      Label* trap_div_by_zero) {
1182   liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kRem>(
1183       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1184 }
1185 
emit_i32_and(Register dst,Register lhs,Register rhs)1186 void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1187   liftoff::EmitCommutativeBinOp<&Assembler::andl, &Assembler::movl>(this, dst,
1188                                                                     lhs, rhs);
1189 }
1190 
emit_i32_andi(Register dst,Register lhs,int32_t imm)1191 void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1192   liftoff::EmitCommutativeBinOpImm<&Assembler::andl, &Assembler::movl>(
1193       this, dst, lhs, imm);
1194 }
1195 
emit_i32_or(Register dst,Register lhs,Register rhs)1196 void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1197   liftoff::EmitCommutativeBinOp<&Assembler::orl, &Assembler::movl>(this, dst,
1198                                                                    lhs, rhs);
1199 }
1200 
emit_i32_ori(Register dst,Register lhs,int32_t imm)1201 void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1202   liftoff::EmitCommutativeBinOpImm<&Assembler::orl, &Assembler::movl>(this, dst,
1203                                                                       lhs, imm);
1204 }
1205 
emit_i32_xor(Register dst,Register lhs,Register rhs)1206 void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1207   liftoff::EmitCommutativeBinOp<&Assembler::xorl, &Assembler::movl>(this, dst,
1208                                                                     lhs, rhs);
1209 }
1210 
emit_i32_xori(Register dst,Register lhs,int32_t imm)1211 void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1212   liftoff::EmitCommutativeBinOpImm<&Assembler::xorl, &Assembler::movl>(
1213       this, dst, lhs, imm);
1214 }
1215 
1216 namespace liftoff {
1217 template <ValueKind kind>
EmitShiftOperation(LiftoffAssembler * assm,Register dst,Register src,Register amount,void (Assembler::* emit_shift)(Register))1218 inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst,
1219                                Register src, Register amount,
1220                                void (Assembler::*emit_shift)(Register)) {
1221   // If dst is rcx, compute into the scratch register first, then move to rcx.
1222   if (dst == rcx) {
1223     assm->Move(kScratchRegister, src, kind);
1224     if (amount != rcx) assm->Move(rcx, amount, kind);
1225     (assm->*emit_shift)(kScratchRegister);
1226     assm->Move(rcx, kScratchRegister, kind);
1227     return;
1228   }
1229 
1230   // Move amount into rcx. If rcx is in use, move its content into the scratch
1231   // register. If src is rcx, src is now the scratch register.
1232   bool use_scratch = false;
1233   if (amount != rcx) {
1234     use_scratch =
1235         src == rcx || assm->cache_state()->is_used(LiftoffRegister(rcx));
1236     if (use_scratch) assm->movq(kScratchRegister, rcx);
1237     if (src == rcx) src = kScratchRegister;
1238     assm->Move(rcx, amount, kind);
1239   }
1240 
1241   // Do the actual shift.
1242   if (dst != src) assm->Move(dst, src, kind);
1243   (assm->*emit_shift)(dst);
1244 
1245   // Restore rcx if needed.
1246   if (use_scratch) assm->movq(rcx, kScratchRegister);
1247 }
1248 }  // namespace liftoff
1249 
emit_i32_shl(Register dst,Register src,Register amount)1250 void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1251                                     Register amount) {
1252   liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1253                                     &Assembler::shll_cl);
1254 }
1255 
emit_i32_shli(Register dst,Register src,int32_t amount)1256 void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1257                                      int32_t amount) {
1258   if (dst != src) movl(dst, src);
1259   shll(dst, Immediate(amount & 31));
1260 }
1261 
emit_i32_sar(Register dst,Register src,Register amount)1262 void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1263                                     Register amount) {
1264   liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1265                                     &Assembler::sarl_cl);
1266 }
1267 
emit_i32_sari(Register dst,Register src,int32_t amount)1268 void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1269                                      int32_t amount) {
1270   if (dst != src) movl(dst, src);
1271   sarl(dst, Immediate(amount & 31));
1272 }
1273 
emit_i32_shr(Register dst,Register src,Register amount)1274 void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1275                                     Register amount) {
1276   liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1277                                     &Assembler::shrl_cl);
1278 }
1279 
emit_i32_shri(Register dst,Register src,int32_t amount)1280 void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1281                                      int32_t amount) {
1282   if (dst != src) movl(dst, src);
1283   shrl(dst, Immediate(amount & 31));
1284 }
1285 
emit_i32_clz(Register dst,Register src)1286 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1287   Lzcntl(dst, src);
1288 }
1289 
emit_i32_ctz(Register dst,Register src)1290 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1291   Tzcntl(dst, src);
1292 }
1293 
emit_i32_popcnt(Register dst,Register src)1294 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1295   if (!CpuFeatures::IsSupported(POPCNT)) return false;
1296   CpuFeatureScope scope(this, POPCNT);
1297   popcntl(dst, src);
1298   return true;
1299 }
1300 
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1301 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1302                                     LiftoffRegister rhs) {
1303   if (lhs.gp() != dst.gp()) {
1304     leaq(dst.gp(), Operand(lhs.gp(), rhs.gp(), times_1, 0));
1305   } else {
1306     addq(dst.gp(), rhs.gp());
1307   }
1308 }
1309 
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1310 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1311                                      int64_t imm) {
1312   if (!is_int32(imm)) {
1313     TurboAssembler::Move(kScratchRegister, imm);
1314     if (lhs.gp() == dst.gp()) {
1315       addq(dst.gp(), kScratchRegister);
1316     } else {
1317       leaq(dst.gp(), Operand(lhs.gp(), kScratchRegister, times_1, 0));
1318     }
1319   } else if (lhs.gp() == dst.gp()) {
1320     addq(dst.gp(), Immediate(static_cast<int32_t>(imm)));
1321   } else {
1322     leaq(dst.gp(), Operand(lhs.gp(), static_cast<int32_t>(imm)));
1323   }
1324 }
1325 
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1326 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1327                                     LiftoffRegister rhs) {
1328   if (lhs.gp() == rhs.gp()) {
1329     xorq(dst.gp(), dst.gp());
1330   } else if (dst.gp() == rhs.gp()) {
1331     negq(dst.gp());
1332     addq(dst.gp(), lhs.gp());
1333   } else {
1334     if (dst.gp() != lhs.gp()) movq(dst.gp(), lhs.gp());
1335     subq(dst.gp(), rhs.gp());
1336   }
1337 }
1338 
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1339 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1340                                     LiftoffRegister rhs) {
1341   liftoff::EmitCommutativeBinOp<&Assembler::imulq, &Assembler::movq>(
1342       this, dst.gp(), lhs.gp(), rhs.gp());
1343 }
1344 
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1345 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1346                                      LiftoffRegister rhs,
1347                                      Label* trap_div_by_zero,
1348                                      Label* trap_div_unrepresentable) {
1349   liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kDiv>(
1350       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero,
1351       trap_div_unrepresentable);
1352   return true;
1353 }
1354 
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1355 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1356                                      LiftoffRegister rhs,
1357                                      Label* trap_div_by_zero) {
1358   liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kDiv>(
1359       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1360   return true;
1361 }
1362 
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1363 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1364                                      LiftoffRegister rhs,
1365                                      Label* trap_div_by_zero) {
1366   liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kRem>(
1367       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1368   return true;
1369 }
1370 
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1371 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1372                                      LiftoffRegister rhs,
1373                                      Label* trap_div_by_zero) {
1374   liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kRem>(
1375       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1376   return true;
1377 }
1378 
emit_i64_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1379 void LiftoffAssembler::emit_i64_and(LiftoffRegister dst, LiftoffRegister lhs,
1380                                     LiftoffRegister rhs) {
1381   liftoff::EmitCommutativeBinOp<&Assembler::andq, &Assembler::movq>(
1382       this, dst.gp(), lhs.gp(), rhs.gp());
1383 }
1384 
emit_i64_andi(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1385 void LiftoffAssembler::emit_i64_andi(LiftoffRegister dst, LiftoffRegister lhs,
1386                                      int32_t imm) {
1387   liftoff::EmitCommutativeBinOpImm<&Assembler::andq, &Assembler::movq>(
1388       this, dst.gp(), lhs.gp(), imm);
1389 }
1390 
emit_i64_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1391 void LiftoffAssembler::emit_i64_or(LiftoffRegister dst, LiftoffRegister lhs,
1392                                    LiftoffRegister rhs) {
1393   liftoff::EmitCommutativeBinOp<&Assembler::orq, &Assembler::movq>(
1394       this, dst.gp(), lhs.gp(), rhs.gp());
1395 }
1396 
emit_i64_ori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1397 void LiftoffAssembler::emit_i64_ori(LiftoffRegister dst, LiftoffRegister lhs,
1398                                     int32_t imm) {
1399   liftoff::EmitCommutativeBinOpImm<&Assembler::orq, &Assembler::movq>(
1400       this, dst.gp(), lhs.gp(), imm);
1401 }
1402 
emit_i64_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1403 void LiftoffAssembler::emit_i64_xor(LiftoffRegister dst, LiftoffRegister lhs,
1404                                     LiftoffRegister rhs) {
1405   liftoff::EmitCommutativeBinOp<&Assembler::xorq, &Assembler::movq>(
1406       this, dst.gp(), lhs.gp(), rhs.gp());
1407 }
1408 
emit_i64_xori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1409 void LiftoffAssembler::emit_i64_xori(LiftoffRegister dst, LiftoffRegister lhs,
1410                                      int32_t imm) {
1411   liftoff::EmitCommutativeBinOpImm<&Assembler::xorq, &Assembler::movq>(
1412       this, dst.gp(), lhs.gp(), imm);
1413 }
1414 
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1415 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1416                                     Register amount) {
1417   liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1418                                     &Assembler::shlq_cl);
1419 }
1420 
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1421 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1422                                      int32_t amount) {
1423   if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1424   shlq(dst.gp(), Immediate(amount & 63));
1425 }
1426 
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1427 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1428                                     Register amount) {
1429   liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1430                                     &Assembler::sarq_cl);
1431 }
1432 
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1433 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1434                                      int32_t amount) {
1435   if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1436   sarq(dst.gp(), Immediate(amount & 63));
1437 }
1438 
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1439 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1440                                     Register amount) {
1441   liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1442                                     &Assembler::shrq_cl);
1443 }
1444 
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1445 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1446                                      int32_t amount) {
1447   if (dst != src) movq(dst.gp(), src.gp());
1448   shrq(dst.gp(), Immediate(amount & 63));
1449 }
1450 
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1451 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1452   Lzcntq(dst.gp(), src.gp());
1453 }
1454 
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1455 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1456   Tzcntq(dst.gp(), src.gp());
1457 }
1458 
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1459 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1460                                        LiftoffRegister src) {
1461   if (!CpuFeatures::IsSupported(POPCNT)) return false;
1462   CpuFeatureScope scope(this, POPCNT);
1463   popcntq(dst.gp(), src.gp());
1464   return true;
1465 }
1466 
emit_u32_to_intptr(Register dst,Register src)1467 void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) {
1468   movl(dst, src);
1469 }
1470 
emit_f32_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1471 void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
1472                                     DoubleRegister rhs) {
1473   if (CpuFeatures::IsSupported(AVX)) {
1474     CpuFeatureScope scope(this, AVX);
1475     vaddss(dst, lhs, rhs);
1476   } else if (dst == rhs) {
1477     addss(dst, lhs);
1478   } else {
1479     if (dst != lhs) movss(dst, lhs);
1480     addss(dst, rhs);
1481   }
1482 }
1483 
emit_f32_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1484 void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
1485                                     DoubleRegister rhs) {
1486   if (CpuFeatures::IsSupported(AVX)) {
1487     CpuFeatureScope scope(this, AVX);
1488     vsubss(dst, lhs, rhs);
1489   } else if (dst == rhs) {
1490     movss(kScratchDoubleReg, rhs);
1491     movss(dst, lhs);
1492     subss(dst, kScratchDoubleReg);
1493   } else {
1494     if (dst != lhs) movss(dst, lhs);
1495     subss(dst, rhs);
1496   }
1497 }
1498 
emit_f32_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1499 void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
1500                                     DoubleRegister rhs) {
1501   if (CpuFeatures::IsSupported(AVX)) {
1502     CpuFeatureScope scope(this, AVX);
1503     vmulss(dst, lhs, rhs);
1504   } else if (dst == rhs) {
1505     mulss(dst, lhs);
1506   } else {
1507     if (dst != lhs) movss(dst, lhs);
1508     mulss(dst, rhs);
1509   }
1510 }
1511 
emit_f32_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1512 void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
1513                                     DoubleRegister rhs) {
1514   if (CpuFeatures::IsSupported(AVX)) {
1515     CpuFeatureScope scope(this, AVX);
1516     vdivss(dst, lhs, rhs);
1517   } else if (dst == rhs) {
1518     movss(kScratchDoubleReg, rhs);
1519     movss(dst, lhs);
1520     divss(dst, kScratchDoubleReg);
1521   } else {
1522     if (dst != lhs) movss(dst, lhs);
1523     divss(dst, rhs);
1524   }
1525 }
1526 
1527 namespace liftoff {
1528 enum class MinOrMax : uint8_t { kMin, kMax };
1529 template <typename type>
EmitFloatMinOrMax(LiftoffAssembler * assm,DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs,MinOrMax min_or_max)1530 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
1531                               DoubleRegister lhs, DoubleRegister rhs,
1532                               MinOrMax min_or_max) {
1533   Label is_nan;
1534   Label lhs_below_rhs;
1535   Label lhs_above_rhs;
1536   Label done;
1537 
1538 #define dop(name, ...)            \
1539   do {                            \
1540     if (sizeof(type) == 4) {      \
1541       assm->name##s(__VA_ARGS__); \
1542     } else {                      \
1543       assm->name##d(__VA_ARGS__); \
1544     }                             \
1545   } while (false)
1546 
1547   // Check the easy cases first: nan (e.g. unordered), smaller and greater.
1548   // NaN has to be checked first, because PF=1 implies CF=1.
1549   dop(Ucomis, lhs, rhs);
1550   assm->j(parity_even, &is_nan, Label::kNear);   // PF=1
1551   assm->j(below, &lhs_below_rhs, Label::kNear);  // CF=1
1552   assm->j(above, &lhs_above_rhs, Label::kNear);  // CF=0 && ZF=0
1553 
1554   // If we get here, then either
1555   // a) {lhs == rhs},
1556   // b) {lhs == -0.0} and {rhs == 0.0}, or
1557   // c) {lhs == 0.0} and {rhs == -0.0}.
1558   // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
1559   // bit of {rhs} to differentiate b) and c).
1560   dop(Movmskp, kScratchRegister, rhs);
1561   assm->testl(kScratchRegister, Immediate(1));
1562   assm->j(zero, &lhs_below_rhs, Label::kNear);
1563   assm->jmp(&lhs_above_rhs, Label::kNear);
1564 
1565   assm->bind(&is_nan);
1566   // Create a NaN output.
1567   dop(Xorp, dst, dst);
1568   dop(Divs, dst, dst);
1569   assm->jmp(&done, Label::kNear);
1570 
1571   assm->bind(&lhs_below_rhs);
1572   DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
1573   if (dst != lhs_below_rhs_src) dop(Movs, dst, lhs_below_rhs_src);
1574   assm->jmp(&done, Label::kNear);
1575 
1576   assm->bind(&lhs_above_rhs);
1577   DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
1578   if (dst != lhs_above_rhs_src) dop(Movs, dst, lhs_above_rhs_src);
1579 
1580   assm->bind(&done);
1581 }
1582 }  // namespace liftoff
1583 
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1584 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1585                                     DoubleRegister rhs) {
1586   liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1587                                     liftoff::MinOrMax::kMin);
1588 }
1589 
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1590 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1591                                     DoubleRegister rhs) {
1592   liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1593                                     liftoff::MinOrMax::kMax);
1594 }
1595 
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1596 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1597                                          DoubleRegister rhs) {
1598   static constexpr int kF32SignBit = 1 << 31;
1599   Movd(kScratchRegister, lhs);
1600   andl(kScratchRegister, Immediate(~kF32SignBit));
1601   Movd(liftoff::kScratchRegister2, rhs);
1602   andl(liftoff::kScratchRegister2, Immediate(kF32SignBit));
1603   orl(kScratchRegister, liftoff::kScratchRegister2);
1604   Movd(dst, kScratchRegister);
1605 }
1606 
emit_f32_abs(DoubleRegister dst,DoubleRegister src)1607 void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
1608   static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1609   if (dst == src) {
1610     TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1611     Andps(dst, kScratchDoubleReg);
1612   } else {
1613     TurboAssembler::Move(dst, kSignBit - 1);
1614     Andps(dst, src);
1615   }
1616 }
1617 
emit_f32_neg(DoubleRegister dst,DoubleRegister src)1618 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
1619   static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1620   if (dst == src) {
1621     TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1622     Xorps(dst, kScratchDoubleReg);
1623   } else {
1624     TurboAssembler::Move(dst, kSignBit);
1625     Xorps(dst, src);
1626   }
1627 }
1628 
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)1629 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1630   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1631   Roundss(dst, src, kRoundUp);
1632   return true;
1633 }
1634 
emit_f32_floor(DoubleRegister dst,DoubleRegister src)1635 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1636   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1637   Roundss(dst, src, kRoundDown);
1638   return true;
1639 }
1640 
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)1641 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1642   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1643   Roundss(dst, src, kRoundToZero);
1644   return true;
1645 }
1646 
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)1647 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1648                                             DoubleRegister src) {
1649   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1650   Roundss(dst, src, kRoundToNearest);
1651   return true;
1652 }
1653 
emit_f32_sqrt(DoubleRegister dst,DoubleRegister src)1654 void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
1655   Sqrtss(dst, src);
1656 }
1657 
emit_f64_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1658 void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
1659                                     DoubleRegister rhs) {
1660   if (CpuFeatures::IsSupported(AVX)) {
1661     CpuFeatureScope scope(this, AVX);
1662     vaddsd(dst, lhs, rhs);
1663   } else if (dst == rhs) {
1664     addsd(dst, lhs);
1665   } else {
1666     if (dst != lhs) movsd(dst, lhs);
1667     addsd(dst, rhs);
1668   }
1669 }
1670 
emit_f64_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1671 void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
1672                                     DoubleRegister rhs) {
1673   if (CpuFeatures::IsSupported(AVX)) {
1674     CpuFeatureScope scope(this, AVX);
1675     vsubsd(dst, lhs, rhs);
1676   } else if (dst == rhs) {
1677     movsd(kScratchDoubleReg, rhs);
1678     movsd(dst, lhs);
1679     subsd(dst, kScratchDoubleReg);
1680   } else {
1681     if (dst != lhs) movsd(dst, lhs);
1682     subsd(dst, rhs);
1683   }
1684 }
1685 
emit_f64_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1686 void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
1687                                     DoubleRegister rhs) {
1688   if (CpuFeatures::IsSupported(AVX)) {
1689     CpuFeatureScope scope(this, AVX);
1690     vmulsd(dst, lhs, rhs);
1691   } else if (dst == rhs) {
1692     mulsd(dst, lhs);
1693   } else {
1694     if (dst != lhs) movsd(dst, lhs);
1695     mulsd(dst, rhs);
1696   }
1697 }
1698 
emit_f64_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1699 void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
1700                                     DoubleRegister rhs) {
1701   if (CpuFeatures::IsSupported(AVX)) {
1702     CpuFeatureScope scope(this, AVX);
1703     vdivsd(dst, lhs, rhs);
1704   } else if (dst == rhs) {
1705     movsd(kScratchDoubleReg, rhs);
1706     movsd(dst, lhs);
1707     divsd(dst, kScratchDoubleReg);
1708   } else {
1709     if (dst != lhs) movsd(dst, lhs);
1710     divsd(dst, rhs);
1711   }
1712 }
1713 
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1714 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1715                                     DoubleRegister rhs) {
1716   liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1717                                      liftoff::MinOrMax::kMin);
1718 }
1719 
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1720 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1721                                          DoubleRegister rhs) {
1722   // Extract sign bit from {rhs} into {kScratchRegister2}.
1723   Movq(liftoff::kScratchRegister2, rhs);
1724   shrq(liftoff::kScratchRegister2, Immediate(63));
1725   shlq(liftoff::kScratchRegister2, Immediate(63));
1726   // Reset sign bit of {lhs} (in {kScratchRegister}).
1727   Movq(kScratchRegister, lhs);
1728   btrq(kScratchRegister, Immediate(63));
1729   // Combine both values into {kScratchRegister} and move into {dst}.
1730   orq(kScratchRegister, liftoff::kScratchRegister2);
1731   Movq(dst, kScratchRegister);
1732 }
1733 
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1734 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1735                                     DoubleRegister rhs) {
1736   liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1737                                      liftoff::MinOrMax::kMax);
1738 }
1739 
emit_f64_abs(DoubleRegister dst,DoubleRegister src)1740 void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
1741   static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1742   if (dst == src) {
1743     TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1744     Andpd(dst, kScratchDoubleReg);
1745   } else {
1746     TurboAssembler::Move(dst, kSignBit - 1);
1747     Andpd(dst, src);
1748   }
1749 }
1750 
emit_f64_neg(DoubleRegister dst,DoubleRegister src)1751 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
1752   static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1753   if (dst == src) {
1754     TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1755     Xorpd(dst, kScratchDoubleReg);
1756   } else {
1757     TurboAssembler::Move(dst, kSignBit);
1758     Xorpd(dst, src);
1759   }
1760 }
1761 
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)1762 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1763   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1764   Roundsd(dst, src, kRoundUp);
1765   return true;
1766 }
1767 
emit_f64_floor(DoubleRegister dst,DoubleRegister src)1768 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1769   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1770   Roundsd(dst, src, kRoundDown);
1771   return true;
1772 }
1773 
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)1774 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1775   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1776   Roundsd(dst, src, kRoundToZero);
1777   return true;
1778 }
1779 
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)1780 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1781                                             DoubleRegister src) {
1782   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1783   Roundsd(dst, src, kRoundToNearest);
1784   return true;
1785 }
1786 
emit_f64_sqrt(DoubleRegister dst,DoubleRegister src)1787 void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
1788   Sqrtsd(dst, src);
1789 }
1790 
1791 namespace liftoff {
1792 #define __ assm->
1793 // Used for float to int conversions. If the value in {converted_back} equals
1794 // {src} afterwards, the conversion succeeded.
1795 template <typename dst_type, typename src_type>
ConvertFloatToIntAndBack(LiftoffAssembler * assm,Register dst,DoubleRegister src,DoubleRegister converted_back)1796 inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
1797                                      DoubleRegister src,
1798                                      DoubleRegister converted_back) {
1799   if (std::is_same<double, src_type>::value) {  // f64
1800     if (std::is_same<int32_t, dst_type>::value) {  // f64 -> i32
1801       __ Cvttsd2si(dst, src);
1802       __ Cvtlsi2sd(converted_back, dst);
1803     } else if (std::is_same<uint32_t, dst_type>::value) {  // f64 -> u32
1804       __ Cvttsd2siq(dst, src);
1805       __ movl(dst, dst);
1806       __ Cvtqsi2sd(converted_back, dst);
1807     } else if (std::is_same<int64_t, dst_type>::value) {  // f64 -> i64
1808       __ Cvttsd2siq(dst, src);
1809       __ Cvtqsi2sd(converted_back, dst);
1810     } else {
1811       UNREACHABLE();
1812     }
1813   } else {                                  // f32
1814     if (std::is_same<int32_t, dst_type>::value) {  // f32 -> i32
1815       __ Cvttss2si(dst, src);
1816       __ Cvtlsi2ss(converted_back, dst);
1817     } else if (std::is_same<uint32_t, dst_type>::value) {  // f32 -> u32
1818       __ Cvttss2siq(dst, src);
1819       __ movl(dst, dst);
1820       __ Cvtqsi2ss(converted_back, dst);
1821     } else if (std::is_same<int64_t, dst_type>::value) {  // f32 -> i64
1822       __ Cvttss2siq(dst, src);
1823       __ Cvtqsi2ss(converted_back, dst);
1824     } else {
1825       UNREACHABLE();
1826     }
1827   }
1828 }
1829 
1830 template <typename dst_type, typename src_type>
EmitTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src,Label * trap)1831 inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1832                                    DoubleRegister src, Label* trap) {
1833   if (!CpuFeatures::IsSupported(SSE4_1)) {
1834     __ bailout(kMissingCPUFeature, "no SSE4.1");
1835     return true;
1836   }
1837   CpuFeatureScope feature(assm, SSE4_1);
1838 
1839   DoubleRegister rounded = kScratchDoubleReg;
1840   DoubleRegister converted_back = kScratchDoubleReg2;
1841 
1842   if (std::is_same<double, src_type>::value) {  // f64
1843     __ Roundsd(rounded, src, kRoundToZero);
1844   } else {  // f32
1845     __ Roundss(rounded, src, kRoundToZero);
1846   }
1847   ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1848                                                converted_back);
1849   if (std::is_same<double, src_type>::value) {  // f64
1850     __ Ucomisd(converted_back, rounded);
1851   } else {  // f32
1852     __ Ucomiss(converted_back, rounded);
1853   }
1854 
1855   // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
1856   // equal.
1857   __ j(parity_even, trap);
1858   __ j(not_equal, trap);
1859   return true;
1860 }
1861 
1862 template <typename dst_type, typename src_type>
EmitSatTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src)1863 inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1864                                       DoubleRegister src) {
1865   if (!CpuFeatures::IsSupported(SSE4_1)) {
1866     __ bailout(kMissingCPUFeature, "no SSE4.1");
1867     return true;
1868   }
1869   CpuFeatureScope feature(assm, SSE4_1);
1870 
1871   Label done;
1872   Label not_nan;
1873   Label src_positive;
1874 
1875   DoubleRegister rounded = kScratchDoubleReg;
1876   DoubleRegister converted_back = kScratchDoubleReg2;
1877   DoubleRegister zero_reg = kScratchDoubleReg;
1878 
1879   if (std::is_same<double, src_type>::value) {  // f64
1880     __ Roundsd(rounded, src, kRoundToZero);
1881   } else {  // f32
1882     __ Roundss(rounded, src, kRoundToZero);
1883   }
1884 
1885   ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1886                                                converted_back);
1887   if (std::is_same<double, src_type>::value) {  // f64
1888     __ Ucomisd(converted_back, rounded);
1889   } else {  // f32
1890     __ Ucomiss(converted_back, rounded);
1891   }
1892 
1893   // Return 0 if PF is 0 (one of the operands was NaN)
1894   __ j(parity_odd, &not_nan);
1895   __ xorl(dst, dst);
1896   __ jmp(&done);
1897 
1898   __ bind(&not_nan);
1899   // If rounding is as expected, return result
1900   __ j(equal, &done);
1901 
1902   __ xorpd(zero_reg, zero_reg);
1903 
1904   // if out-of-bounds, check if src is positive
1905   if (std::is_same<double, src_type>::value) {  // f64
1906     __ Ucomisd(src, zero_reg);
1907   } else {  // f32
1908     __ Ucomiss(src, zero_reg);
1909   }
1910   __ j(above, &src_positive);
1911   if (std::is_same<int32_t, dst_type>::value ||
1912       std::is_same<uint32_t, dst_type>::value) {  // i32
1913     __ movl(
1914         dst,
1915         Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::min())));
1916   } else if (std::is_same<int64_t, dst_type>::value) {  // i64s
1917     __ movq(dst, Immediate64(std::numeric_limits<dst_type>::min()));
1918   } else {
1919     UNREACHABLE();
1920   }
1921   __ jmp(&done);
1922 
1923   __ bind(&src_positive);
1924   if (std::is_same<int32_t, dst_type>::value ||
1925       std::is_same<uint32_t, dst_type>::value) {  // i32
1926     __ movl(
1927         dst,
1928         Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::max())));
1929   } else if (std::is_same<int64_t, dst_type>::value) {  // i64s
1930     __ movq(dst, Immediate64(std::numeric_limits<dst_type>::max()));
1931   } else {
1932     UNREACHABLE();
1933   }
1934 
1935   __ bind(&done);
1936   return true;
1937 }
1938 
1939 template <typename src_type>
EmitSatTruncateFloatToUInt64(LiftoffAssembler * assm,Register dst,DoubleRegister src)1940 inline bool EmitSatTruncateFloatToUInt64(LiftoffAssembler* assm, Register dst,
1941                                          DoubleRegister src) {
1942   if (!CpuFeatures::IsSupported(SSE4_1)) {
1943     __ bailout(kMissingCPUFeature, "no SSE4.1");
1944     return true;
1945   }
1946   CpuFeatureScope feature(assm, SSE4_1);
1947 
1948   Label done;
1949   Label neg_or_nan;
1950   Label overflow;
1951 
1952   DoubleRegister zero_reg = kScratchDoubleReg;
1953 
1954   __ xorpd(zero_reg, zero_reg);
1955   if (std::is_same<double, src_type>::value) {  // f64
1956     __ Ucomisd(src, zero_reg);
1957   } else {  // f32
1958     __ Ucomiss(src, zero_reg);
1959   }
1960   // Check if NaN
1961   __ j(parity_even, &neg_or_nan);
1962   __ j(below, &neg_or_nan);
1963   if (std::is_same<double, src_type>::value) {  // f64
1964     __ Cvttsd2uiq(dst, src, &overflow);
1965   } else {  // f32
1966     __ Cvttss2uiq(dst, src, &overflow);
1967   }
1968   __ jmp(&done);
1969 
1970   __ bind(&neg_or_nan);
1971   __ movq(dst, zero_reg);
1972   __ jmp(&done);
1973 
1974   __ bind(&overflow);
1975   __ movq(dst, Immediate64(std::numeric_limits<uint64_t>::max()));
1976   __ bind(&done);
1977   return true;
1978 }
1979 #undef __
1980 }  // namespace liftoff
1981 
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)1982 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1983                                             LiftoffRegister dst,
1984                                             LiftoffRegister src, Label* trap) {
1985   switch (opcode) {
1986     case kExprI32ConvertI64:
1987       movl(dst.gp(), src.gp());
1988       return true;
1989     case kExprI32SConvertF32:
1990       return liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(),
1991                                                              src.fp(), trap);
1992     case kExprI32UConvertF32:
1993       return liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
1994                                                               src.fp(), trap);
1995     case kExprI32SConvertF64:
1996       return liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(),
1997                                                               src.fp(), trap);
1998     case kExprI32UConvertF64:
1999       return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2000                                                                src.fp(), trap);
2001     case kExprI32SConvertSatF32:
2002       return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2003                                                                 src.fp());
2004     case kExprI32UConvertSatF32:
2005       return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2006                                                                  src.fp());
2007     case kExprI32SConvertSatF64:
2008       return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2009                                                                  src.fp());
2010     case kExprI32UConvertSatF64:
2011       return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
2012           this, dst.gp(), src.fp());
2013     case kExprI32ReinterpretF32:
2014       Movd(dst.gp(), src.fp());
2015       return true;
2016     case kExprI64SConvertI32:
2017       movsxlq(dst.gp(), src.gp());
2018       return true;
2019     case kExprI64SConvertF32:
2020       return liftoff::EmitTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2021                                                              src.fp(), trap);
2022     case kExprI64UConvertF32: {
2023       RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2024       Cvttss2uiq(dst.gp(), src.fp(), trap);
2025       return true;
2026     }
2027     case kExprI64SConvertF64:
2028       return liftoff::EmitTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2029                                                               src.fp(), trap);
2030     case kExprI64UConvertF64: {
2031       RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2032       Cvttsd2uiq(dst.gp(), src.fp(), trap);
2033       return true;
2034     }
2035     case kExprI64SConvertSatF32:
2036       return liftoff::EmitSatTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2037                                                                 src.fp());
2038     case kExprI64UConvertSatF32: {
2039       return liftoff::EmitSatTruncateFloatToUInt64<float>(this, dst.gp(),
2040                                                           src.fp());
2041     }
2042     case kExprI64SConvertSatF64:
2043       return liftoff::EmitSatTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2044                                                                  src.fp());
2045     case kExprI64UConvertSatF64: {
2046       return liftoff::EmitSatTruncateFloatToUInt64<double>(this, dst.gp(),
2047                                                            src.fp());
2048     }
2049     case kExprI64UConvertI32:
2050       AssertZeroExtended(src.gp());
2051       if (dst.gp() != src.gp()) movl(dst.gp(), src.gp());
2052       return true;
2053     case kExprI64ReinterpretF64:
2054       Movq(dst.gp(), src.fp());
2055       return true;
2056     case kExprF32SConvertI32:
2057       Cvtlsi2ss(dst.fp(), src.gp());
2058       return true;
2059     case kExprF32UConvertI32:
2060       movl(kScratchRegister, src.gp());
2061       Cvtqsi2ss(dst.fp(), kScratchRegister);
2062       return true;
2063     case kExprF32SConvertI64:
2064       Cvtqsi2ss(dst.fp(), src.gp());
2065       return true;
2066     case kExprF32UConvertI64:
2067       Cvtqui2ss(dst.fp(), src.gp());
2068       return true;
2069     case kExprF32ConvertF64:
2070       Cvtsd2ss(dst.fp(), src.fp());
2071       return true;
2072     case kExprF32ReinterpretI32:
2073       Movd(dst.fp(), src.gp());
2074       return true;
2075     case kExprF64SConvertI32:
2076       Cvtlsi2sd(dst.fp(), src.gp());
2077       return true;
2078     case kExprF64UConvertI32:
2079       movl(kScratchRegister, src.gp());
2080       Cvtqsi2sd(dst.fp(), kScratchRegister);
2081       return true;
2082     case kExprF64SConvertI64:
2083       Cvtqsi2sd(dst.fp(), src.gp());
2084       return true;
2085     case kExprF64UConvertI64:
2086       Cvtqui2sd(dst.fp(), src.gp());
2087       return true;
2088     case kExprF64ConvertF32:
2089       Cvtss2sd(dst.fp(), src.fp());
2090       return true;
2091     case kExprF64ReinterpretI64:
2092       Movq(dst.fp(), src.gp());
2093       return true;
2094     default:
2095       UNREACHABLE();
2096   }
2097 }
2098 
emit_i32_signextend_i8(Register dst,Register src)2099 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2100   movsxbl(dst, src);
2101 }
2102 
emit_i32_signextend_i16(Register dst,Register src)2103 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2104   movsxwl(dst, src);
2105 }
2106 
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2107 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2108                                               LiftoffRegister src) {
2109   movsxbq(dst.gp(), src.gp());
2110 }
2111 
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2112 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2113                                                LiftoffRegister src) {
2114   movsxwq(dst.gp(), src.gp());
2115 }
2116 
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2117 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2118                                                LiftoffRegister src) {
2119   movsxlq(dst.gp(), src.gp());
2120 }
2121 
emit_jump(Label * label)2122 void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2123 
emit_jump(Register target)2124 void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2125 
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2126 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2127                                       Label* label, ValueKind kind,
2128                                       Register lhs, Register rhs) {
2129   Condition cond = liftoff::ToCondition(liftoff_cond);
2130   if (rhs != no_reg) {
2131     switch (kind) {
2132       case kI32:
2133         cmpl(lhs, rhs);
2134         break;
2135       case kRef:
2136       case kOptRef:
2137       case kRtt:
2138       case kRttWithDepth:
2139         DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
2140         V8_FALLTHROUGH;
2141       case kI64:
2142         cmpq(lhs, rhs);
2143         break;
2144       default:
2145         UNREACHABLE();
2146     }
2147   } else {
2148     DCHECK_EQ(kind, kI32);
2149     testl(lhs, lhs);
2150   }
2151 
2152   j(cond, label);
2153 }
2154 
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int imm)2155 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2156                                            Label* label, Register lhs,
2157                                            int imm) {
2158   Condition cond = liftoff::ToCondition(liftoff_cond);
2159   cmpl(lhs, Immediate(imm));
2160   j(cond, label);
2161 }
2162 
emit_i32_eqz(Register dst,Register src)2163 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2164   testl(src, src);
2165   setcc(equal, dst);
2166   movzxbl(dst, dst);
2167 }
2168 
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2169 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2170                                          Register dst, Register lhs,
2171                                          Register rhs) {
2172   Condition cond = liftoff::ToCondition(liftoff_cond);
2173   cmpl(lhs, rhs);
2174   setcc(cond, dst);
2175   movzxbl(dst, dst);
2176 }
2177 
emit_i64_eqz(Register dst,LiftoffRegister src)2178 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2179   testq(src.gp(), src.gp());
2180   setcc(equal, dst);
2181   movzxbl(dst, dst);
2182 }
2183 
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2184 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2185                                          Register dst, LiftoffRegister lhs,
2186                                          LiftoffRegister rhs) {
2187   Condition cond = liftoff::ToCondition(liftoff_cond);
2188   cmpq(lhs.gp(), rhs.gp());
2189   setcc(cond, dst);
2190   movzxbl(dst, dst);
2191 }
2192 
2193 namespace liftoff {
2194 template <void (SharedTurboAssembler::*cmp_op)(DoubleRegister, DoubleRegister)>
EmitFloatSetCond(LiftoffAssembler * assm,Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2195 void EmitFloatSetCond(LiftoffAssembler* assm, Condition cond, Register dst,
2196                       DoubleRegister lhs, DoubleRegister rhs) {
2197   Label cont;
2198   Label not_nan;
2199 
2200   (assm->*cmp_op)(lhs, rhs);
2201   // If PF is one, one of the operands was NaN. This needs special handling.
2202   assm->j(parity_odd, &not_nan, Label::kNear);
2203   // Return 1 for f32.ne, 0 for all other cases.
2204   if (cond == not_equal) {
2205     assm->movl(dst, Immediate(1));
2206   } else {
2207     assm->xorl(dst, dst);
2208   }
2209   assm->jmp(&cont, Label::kNear);
2210   assm->bind(&not_nan);
2211 
2212   assm->setcc(cond, dst);
2213   assm->movzxbl(dst, dst);
2214   assm->bind(&cont);
2215 }
2216 }  // namespace liftoff
2217 
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2218 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2219                                          Register dst, DoubleRegister lhs,
2220                                          DoubleRegister rhs) {
2221   Condition cond = liftoff::ToCondition(liftoff_cond);
2222   liftoff::EmitFloatSetCond<&TurboAssembler::Ucomiss>(this, cond, dst, lhs,
2223                                                       rhs);
2224 }
2225 
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2226 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2227                                          Register dst, DoubleRegister lhs,
2228                                          DoubleRegister rhs) {
2229   Condition cond = liftoff::ToCondition(liftoff_cond);
2230   liftoff::EmitFloatSetCond<&TurboAssembler::Ucomisd>(this, cond, dst, lhs,
2231                                                       rhs);
2232 }
2233 
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2234 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2235                                    LiftoffRegister true_value,
2236                                    LiftoffRegister false_value,
2237                                    ValueKind kind) {
2238   if (kind != kI32 && kind != kI64) return false;
2239 
2240   testl(condition, condition);
2241 
2242   if (kind == kI32) {
2243     if (dst == false_value) {
2244       cmovl(not_zero, dst.gp(), true_value.gp());
2245     } else {
2246       if (dst != true_value) movl(dst.gp(), true_value.gp());
2247       cmovl(zero, dst.gp(), false_value.gp());
2248     }
2249   } else {
2250     if (dst == false_value) {
2251       cmovq(not_zero, dst.gp(), true_value.gp());
2252     } else {
2253       if (dst != true_value) movq(dst.gp(), true_value.gp());
2254       cmovq(zero, dst.gp(), false_value.gp());
2255     }
2256   }
2257 
2258   return true;
2259 }
2260 
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2261 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2262                                       SmiCheckMode mode) {
2263   testb(obj, Immediate(kSmiTagMask));
2264   Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2265   j(condition, target);
2266 }
2267 
2268 // TODO(fanchenk): Distinguish mov* if data bypass delay matter.
2269 namespace liftoff {
2270 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2271           void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2272 void EmitSimdCommutativeBinOp(
2273     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2274     LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2275   if (CpuFeatures::IsSupported(AVX)) {
2276     CpuFeatureScope scope(assm, AVX);
2277     (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2278     return;
2279   }
2280 
2281   base::Optional<CpuFeatureScope> sse_scope;
2282   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2283 
2284   if (dst.fp() == rhs.fp()) {
2285     (assm->*sse_op)(dst.fp(), lhs.fp());
2286   } else {
2287     if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2288     (assm->*sse_op)(dst.fp(), rhs.fp());
2289   }
2290 }
2291 
2292 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2293           void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2294 void EmitSimdNonCommutativeBinOp(
2295     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2296     LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2297   if (CpuFeatures::IsSupported(AVX)) {
2298     CpuFeatureScope scope(assm, AVX);
2299     (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2300     return;
2301   }
2302 
2303   base::Optional<CpuFeatureScope> sse_scope;
2304   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2305 
2306   if (dst.fp() == rhs.fp()) {
2307     assm->movaps(kScratchDoubleReg, rhs.fp());
2308     assm->movaps(dst.fp(), lhs.fp());
2309     (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2310   } else {
2311     if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2312     (assm->*sse_op)(dst.fp(), rhs.fp());
2313   }
2314 }
2315 
2316 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2317           void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
EmitSimdShiftOp(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,LiftoffRegister count)2318 void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
2319                      LiftoffRegister operand, LiftoffRegister count) {
2320   constexpr int mask = (1 << width) - 1;
2321   assm->movq(kScratchRegister, count.gp());
2322   assm->andq(kScratchRegister, Immediate(mask));
2323   assm->Movq(kScratchDoubleReg, kScratchRegister);
2324   if (CpuFeatures::IsSupported(AVX)) {
2325     CpuFeatureScope scope(assm, AVX);
2326     (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2327   } else {
2328     if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2329     (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2330   }
2331 }
2332 
2333 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
2334           void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
EmitSimdShiftOpImm(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,int32_t count)2335 void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
2336                         LiftoffRegister operand, int32_t count) {
2337   constexpr int mask = (1 << width) - 1;
2338   byte shift = static_cast<byte>(count & mask);
2339   if (CpuFeatures::IsSupported(AVX)) {
2340     CpuFeatureScope scope(assm, AVX);
2341     (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2342   } else {
2343     if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2344     (assm->*sse_op)(dst.fp(), shift);
2345   }
2346 }
2347 
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)2348 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2349                         LiftoffRegister src) {
2350   assm->xorq(dst.gp(), dst.gp());
2351   assm->Ptest(src.fp(), src.fp());
2352   assm->setcc(not_equal, dst.gp());
2353 }
2354 
2355 template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
2356 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2357                         LiftoffRegister src,
2358                         base::Optional<CpuFeature> feature = base::nullopt) {
2359   base::Optional<CpuFeatureScope> sse_scope;
2360   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2361 
2362   XMMRegister tmp = kScratchDoubleReg;
2363   assm->xorq(dst.gp(), dst.gp());
2364   assm->Pxor(tmp, tmp);
2365   (assm->*pcmp)(tmp, src.fp());
2366   assm->Ptest(tmp, tmp);
2367   assm->setcc(equal, dst.gp());
2368 }
2369 
2370 }  // namespace liftoff
2371 
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2372 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2373                                      Register offset_reg, uintptr_t offset_imm,
2374                                      LoadType type,
2375                                      LoadTransformationKind transform,
2376                                      uint32_t* protected_load_pc) {
2377   Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
2378   *protected_load_pc = pc_offset();
2379   MachineType memtype = type.mem_type();
2380   if (transform == LoadTransformationKind::kExtend) {
2381     if (memtype == MachineType::Int8()) {
2382       Pmovsxbw(dst.fp(), src_op);
2383     } else if (memtype == MachineType::Uint8()) {
2384       Pmovzxbw(dst.fp(), src_op);
2385     } else if (memtype == MachineType::Int16()) {
2386       Pmovsxwd(dst.fp(), src_op);
2387     } else if (memtype == MachineType::Uint16()) {
2388       Pmovzxwd(dst.fp(), src_op);
2389     } else if (memtype == MachineType::Int32()) {
2390       Pmovsxdq(dst.fp(), src_op);
2391     } else if (memtype == MachineType::Uint32()) {
2392       Pmovzxdq(dst.fp(), src_op);
2393     }
2394   } else if (transform == LoadTransformationKind::kZeroExtend) {
2395     if (memtype == MachineType::Int32()) {
2396       Movss(dst.fp(), src_op);
2397     } else {
2398       DCHECK_EQ(MachineType::Int64(), memtype);
2399       Movsd(dst.fp(), src_op);
2400     }
2401   } else {
2402     DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2403     if (memtype == MachineType::Int8()) {
2404       S128Load8Splat(dst.fp(), src_op, kScratchDoubleReg);
2405     } else if (memtype == MachineType::Int16()) {
2406       S128Load16Splat(dst.fp(), src_op, kScratchDoubleReg);
2407     } else if (memtype == MachineType::Int32()) {
2408       S128Load32Splat(dst.fp(), src_op);
2409     } else if (memtype == MachineType::Int64()) {
2410       Movddup(dst.fp(), src_op);
2411     }
2412   }
2413 }
2414 
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2415 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2416                                 Register addr, Register offset_reg,
2417                                 uintptr_t offset_imm, LoadType type,
2418                                 uint8_t laneidx, uint32_t* protected_load_pc) {
2419   Operand src_op = liftoff::GetMemOp(this, addr, offset_reg, offset_imm);
2420 
2421   MachineType mem_type = type.mem_type();
2422   if (mem_type == MachineType::Int8()) {
2423     Pinsrb(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2424   } else if (mem_type == MachineType::Int16()) {
2425     Pinsrw(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2426   } else if (mem_type == MachineType::Int32()) {
2427     Pinsrd(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2428   } else {
2429     DCHECK_EQ(MachineType::Int64(), mem_type);
2430     Pinsrq(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2431   }
2432 }
2433 
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)2434 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2435                                  uintptr_t offset_imm, LiftoffRegister src,
2436                                  StoreType type, uint8_t lane,
2437                                  uint32_t* protected_store_pc) {
2438   Operand dst_op = liftoff::GetMemOp(this, dst, offset, offset_imm);
2439   if (protected_store_pc) *protected_store_pc = pc_offset();
2440   MachineRepresentation rep = type.mem_rep();
2441   if (rep == MachineRepresentation::kWord8) {
2442     Pextrb(dst_op, src.fp(), lane);
2443   } else if (rep == MachineRepresentation::kWord16) {
2444     Pextrw(dst_op, src.fp(), lane);
2445   } else if (rep == MachineRepresentation::kWord32) {
2446     S128Store32Lane(dst_op, src.fp(), lane);
2447   } else {
2448     DCHECK_EQ(MachineRepresentation::kWord64, rep);
2449     S128Store64Lane(dst_op, src.fp(), lane);
2450   }
2451 }
2452 
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2453 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2454                                           LiftoffRegister lhs,
2455                                           LiftoffRegister rhs,
2456                                           const uint8_t shuffle[16],
2457                                           bool is_swizzle) {
2458   if (is_swizzle) {
2459     uint32_t imms[4];
2460     // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
2461     wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
2462     TurboAssembler::Move(kScratchDoubleReg, make_uint64(imms[3], imms[2]),
2463                          make_uint64(imms[1], imms[0]));
2464     Pshufb(dst.fp(), lhs.fp(), kScratchDoubleReg);
2465     return;
2466   }
2467 
2468   uint64_t mask1[2] = {};
2469   for (int i = 15; i >= 0; i--) {
2470     uint8_t lane = shuffle[i];
2471     int j = i >> 3;
2472     mask1[j] <<= 8;
2473     mask1[j] |= lane < kSimd128Size ? lane : 0x80;
2474   }
2475   TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask1[1], mask1[0]);
2476   Pshufb(kScratchDoubleReg, lhs.fp(), liftoff::kScratchDoubleReg2);
2477 
2478   uint64_t mask2[2] = {};
2479   for (int i = 15; i >= 0; i--) {
2480     uint8_t lane = shuffle[i];
2481     int j = i >> 3;
2482     mask2[j] <<= 8;
2483     mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
2484   }
2485   TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask2[1], mask2[0]);
2486 
2487   Pshufb(dst.fp(), rhs.fp(), liftoff::kScratchDoubleReg2);
2488   Por(dst.fp(), kScratchDoubleReg);
2489 }
2490 
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2491 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2492                                           LiftoffRegister lhs,
2493                                           LiftoffRegister rhs) {
2494   I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg,
2495                kScratchRegister);
2496 }
2497 
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)2498 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2499                                          LiftoffRegister src) {
2500   I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg,
2501               liftoff::kScratchDoubleReg2, kScratchRegister);
2502 }
2503 
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2504 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2505                                         LiftoffRegister src) {
2506   I8x16Splat(dst.fp(), src.gp(), kScratchDoubleReg);
2507 }
2508 
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2509 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2510                                         LiftoffRegister src) {
2511   I16x8Splat(dst.fp(), src.gp());
2512 }
2513 
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)2514 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2515                                         LiftoffRegister src) {
2516   Movd(dst.fp(), src.gp());
2517   Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
2518 }
2519 
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2520 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2521                                         LiftoffRegister src) {
2522   Movq(dst.fp(), src.gp());
2523   Movddup(dst.fp(), dst.fp());
2524 }
2525 
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2526 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2527                                         LiftoffRegister src) {
2528   F32x4Splat(dst.fp(), src.fp());
2529 }
2530 
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2531 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2532                                         LiftoffRegister src) {
2533   Movddup(dst.fp(), src.fp());
2534 }
2535 
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2536 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2537                                      LiftoffRegister rhs) {
2538   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2539       this, dst, lhs, rhs);
2540 }
2541 
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2542 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2543                                      LiftoffRegister rhs) {
2544   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2545       this, dst, lhs, rhs);
2546   Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2547   Pxor(dst.fp(), kScratchDoubleReg);
2548 }
2549 
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2550 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2551                                        LiftoffRegister rhs) {
2552   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
2553                                        &Assembler::pcmpgtb>(this, dst, lhs,
2554                                                             rhs);
2555 }
2556 
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2557 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2558                                        LiftoffRegister rhs) {
2559   DoubleRegister ref = rhs.fp();
2560   if (dst == rhs) {
2561     Movaps(kScratchDoubleReg, rhs.fp());
2562     ref = kScratchDoubleReg;
2563   }
2564   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2565       this, dst, lhs, rhs, SSE4_1);
2566   Pcmpeqb(dst.fp(), ref);
2567   Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2568   Pxor(dst.fp(), kScratchDoubleReg);
2569 }
2570 
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2571 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2572                                        LiftoffRegister rhs) {
2573   DoubleRegister ref = rhs.fp();
2574   if (dst == rhs) {
2575     Movaps(kScratchDoubleReg, rhs.fp());
2576     ref = kScratchDoubleReg;
2577   }
2578   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2579       this, dst, lhs, rhs, SSE4_1);
2580   Pcmpeqb(dst.fp(), ref);
2581 }
2582 
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2583 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2584                                        LiftoffRegister rhs) {
2585   DoubleRegister ref = rhs.fp();
2586   if (dst == rhs) {
2587     Movaps(kScratchDoubleReg, rhs.fp());
2588     ref = kScratchDoubleReg;
2589   }
2590   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2591       this, dst, lhs, rhs);
2592   Pcmpeqb(dst.fp(), ref);
2593 }
2594 
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2595 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2596                                      LiftoffRegister rhs) {
2597   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2598       this, dst, lhs, rhs);
2599 }
2600 
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2601 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2602                                      LiftoffRegister rhs) {
2603   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2604       this, dst, lhs, rhs);
2605   Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2606   Pxor(dst.fp(), kScratchDoubleReg);
2607 }
2608 
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2609 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2610                                        LiftoffRegister rhs) {
2611   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
2612                                        &Assembler::pcmpgtw>(this, dst, lhs,
2613                                                             rhs);
2614 }
2615 
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2616 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2617                                        LiftoffRegister rhs) {
2618   DoubleRegister ref = rhs.fp();
2619   if (dst == rhs) {
2620     Movaps(kScratchDoubleReg, rhs.fp());
2621     ref = kScratchDoubleReg;
2622   }
2623   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
2624       this, dst, lhs, rhs);
2625   Pcmpeqw(dst.fp(), ref);
2626   Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2627   Pxor(dst.fp(), kScratchDoubleReg);
2628 }
2629 
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2630 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2631                                        LiftoffRegister rhs) {
2632   DoubleRegister ref = rhs.fp();
2633   if (dst == rhs) {
2634     Movaps(kScratchDoubleReg, rhs.fp());
2635     ref = kScratchDoubleReg;
2636   }
2637   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
2638       this, dst, lhs, rhs);
2639   Pcmpeqw(dst.fp(), ref);
2640 }
2641 
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2642 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2643                                        LiftoffRegister rhs) {
2644   DoubleRegister ref = rhs.fp();
2645   if (dst == rhs) {
2646     Movaps(kScratchDoubleReg, rhs.fp());
2647     ref = kScratchDoubleReg;
2648   }
2649   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
2650       this, dst, lhs, rhs, SSE4_1);
2651   Pcmpeqw(dst.fp(), ref);
2652 }
2653 
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2654 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2655                                      LiftoffRegister rhs) {
2656   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2657       this, dst, lhs, rhs);
2658 }
2659 
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2660 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2661                                      LiftoffRegister rhs) {
2662   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2663       this, dst, lhs, rhs);
2664   Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2665   Pxor(dst.fp(), kScratchDoubleReg);
2666 }
2667 
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2668 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2669                                        LiftoffRegister rhs) {
2670   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
2671                                        &Assembler::pcmpgtd>(this, dst, lhs,
2672                                                             rhs);
2673 }
2674 
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2675 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2676                                        LiftoffRegister rhs) {
2677   DoubleRegister ref = rhs.fp();
2678   if (dst == rhs) {
2679     Movaps(kScratchDoubleReg, rhs.fp());
2680     ref = kScratchDoubleReg;
2681   }
2682   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
2683       this, dst, lhs, rhs, SSE4_1);
2684   Pcmpeqd(dst.fp(), ref);
2685   Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2686   Pxor(dst.fp(), kScratchDoubleReg);
2687 }
2688 
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2689 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2690                                        LiftoffRegister rhs) {
2691   DoubleRegister ref = rhs.fp();
2692   if (dst == rhs) {
2693     Movaps(kScratchDoubleReg, rhs.fp());
2694     ref = kScratchDoubleReg;
2695   }
2696   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
2697       this, dst, lhs, rhs, SSE4_1);
2698   Pcmpeqd(dst.fp(), ref);
2699 }
2700 
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2701 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2702                                        LiftoffRegister rhs) {
2703   DoubleRegister ref = rhs.fp();
2704   if (dst == rhs) {
2705     Movaps(kScratchDoubleReg, rhs.fp());
2706     ref = kScratchDoubleReg;
2707   }
2708   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
2709       this, dst, lhs, rhs, SSE4_1);
2710   Pcmpeqd(dst.fp(), ref);
2711 }
2712 
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2713 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2714                                      LiftoffRegister rhs) {
2715   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2716       this, dst, lhs, rhs, SSE4_1);
2717 }
2718 
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2719 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2720                                      LiftoffRegister rhs) {
2721   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2722       this, dst, lhs, rhs, SSE4_1);
2723   Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
2724   Pxor(dst.fp(), kScratchDoubleReg);
2725 }
2726 
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2727 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2728                                        LiftoffRegister rhs) {
2729   // Different register alias requirements depending on CpuFeatures supported:
2730   if (CpuFeatures::IsSupported(AVX) || CpuFeatures::IsSupported(SSE4_2)) {
2731     // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
2732     I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2733   } else {
2734     // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2735     if (dst == lhs || dst == rhs) {
2736       I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2737                kScratchDoubleReg);
2738       movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2739     } else {
2740       I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2741     }
2742   }
2743 }
2744 
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2745 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2746                                        LiftoffRegister rhs) {
2747   // Different register alias requirements depending on CpuFeatures supported:
2748   if (CpuFeatures::IsSupported(AVX)) {
2749     // 1. AVX, no requirements.
2750     I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2751   } else if (CpuFeatures::IsSupported(SSE4_2)) {
2752     // 2. SSE4_2, dst != lhs.
2753     if (dst == lhs) {
2754       I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2755                kScratchDoubleReg);
2756       movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2757     } else {
2758       I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2759     }
2760   } else {
2761     // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2762     if (dst == lhs || dst == rhs) {
2763       I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2764                kScratchDoubleReg);
2765       movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2766     } else {
2767       I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2768     }
2769   }
2770 }
2771 
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2772 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2773                                      LiftoffRegister rhs) {
2774   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
2775       this, dst, lhs, rhs);
2776 }
2777 
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2778 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2779                                      LiftoffRegister rhs) {
2780   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
2781                                     &Assembler::cmpneqps>(this, dst, lhs, rhs);
2782 }
2783 
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2784 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2785                                      LiftoffRegister rhs) {
2786   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
2787                                        &Assembler::cmpltps>(this, dst, lhs,
2788                                                             rhs);
2789 }
2790 
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2791 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2792                                      LiftoffRegister rhs) {
2793   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
2794                                        &Assembler::cmpleps>(this, dst, lhs,
2795                                                             rhs);
2796 }
2797 
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2798 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2799                                      LiftoffRegister rhs) {
2800   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
2801       this, dst, lhs, rhs);
2802 }
2803 
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2804 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2805                                      LiftoffRegister rhs) {
2806   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
2807                                     &Assembler::cmpneqpd>(this, dst, lhs, rhs);
2808 }
2809 
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2810 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2811                                      LiftoffRegister rhs) {
2812   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
2813                                        &Assembler::cmpltpd>(this, dst, lhs,
2814                                                             rhs);
2815 }
2816 
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2817 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2818                                      LiftoffRegister rhs) {
2819   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
2820                                        &Assembler::cmplepd>(this, dst, lhs,
2821                                                             rhs);
2822 }
2823 
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])2824 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2825                                        const uint8_t imms[16]) {
2826   uint64_t vals[2];
2827   memcpy(vals, imms, sizeof(vals));
2828   TurboAssembler::Move(dst.fp(), vals[1], vals[0]);
2829 }
2830 
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)2831 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2832   S128Not(dst.fp(), src.fp(), kScratchDoubleReg);
2833 }
2834 
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2835 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2836                                      LiftoffRegister rhs) {
2837   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
2838       this, dst, lhs, rhs);
2839 }
2840 
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2841 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2842                                     LiftoffRegister rhs) {
2843   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
2844       this, dst, lhs, rhs);
2845 }
2846 
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2847 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2848                                      LiftoffRegister rhs) {
2849   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
2850       this, dst, lhs, rhs);
2851 }
2852 
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)2853 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2854                                         LiftoffRegister src1,
2855                                         LiftoffRegister src2,
2856                                         LiftoffRegister mask) {
2857   // Ensure that we don't overwrite any inputs with the movaps below.
2858   DCHECK_NE(dst, src1);
2859   DCHECK_NE(dst, src2);
2860   if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
2861     movaps(dst.fp(), mask.fp());
2862     S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2863   } else {
2864     S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2865   }
2866 }
2867 
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)2868 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2869                                       LiftoffRegister src) {
2870   if (dst.fp() == src.fp()) {
2871     Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2872     Psignb(dst.fp(), kScratchDoubleReg);
2873   } else {
2874     Pxor(dst.fp(), dst.fp());
2875     Psubb(dst.fp(), src.fp());
2876   }
2877 }
2878 
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)2879 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2880                                          LiftoffRegister src) {
2881   liftoff::EmitAnyTrue(this, dst, src);
2882 }
2883 
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)2884 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2885                                           LiftoffRegister src) {
2886   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
2887 }
2888 
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)2889 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2890                                           LiftoffRegister src) {
2891   Pmovmskb(dst.gp(), src.fp());
2892 }
2893 
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2894 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2895                                       LiftoffRegister rhs) {
2896   I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2897            liftoff::kScratchDoubleReg2);
2898 }
2899 
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2900 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2901                                        int32_t rhs) {
2902   I8x16Shl(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2903 }
2904 
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2905 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2906                                         LiftoffRegister lhs,
2907                                         LiftoffRegister rhs) {
2908   I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2909             liftoff::kScratchDoubleReg2);
2910 }
2911 
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2912 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2913                                          LiftoffRegister lhs, int32_t rhs) {
2914   I8x16ShrS(dst.fp(), lhs.fp(), rhs, kScratchDoubleReg);
2915 }
2916 
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2917 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2918                                         LiftoffRegister lhs,
2919                                         LiftoffRegister rhs) {
2920   I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2921             liftoff::kScratchDoubleReg2);
2922 }
2923 
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2924 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2925                                          LiftoffRegister lhs, int32_t rhs) {
2926   I8x16ShrU(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2927 }
2928 
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2929 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2930                                       LiftoffRegister rhs) {
2931   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
2932       this, dst, lhs, rhs);
2933 }
2934 
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2935 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2936                                             LiftoffRegister lhs,
2937                                             LiftoffRegister rhs) {
2938   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
2939       this, dst, lhs, rhs);
2940 }
2941 
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2942 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2943                                             LiftoffRegister lhs,
2944                                             LiftoffRegister rhs) {
2945   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
2946       this, dst, lhs, rhs);
2947 }
2948 
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2949 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2950                                       LiftoffRegister rhs) {
2951   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
2952       this, dst, lhs, rhs);
2953 }
2954 
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2955 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2956                                             LiftoffRegister lhs,
2957                                             LiftoffRegister rhs) {
2958   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
2959       this, dst, lhs, rhs);
2960 }
2961 
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2962 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2963                                             LiftoffRegister lhs,
2964                                             LiftoffRegister rhs) {
2965   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
2966                                        &Assembler::psubusb>(this, dst, lhs,
2967                                                             rhs);
2968 }
2969 
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2970 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2971                                         LiftoffRegister lhs,
2972                                         LiftoffRegister rhs) {
2973   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2974       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
2975 }
2976 
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2977 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2978                                         LiftoffRegister lhs,
2979                                         LiftoffRegister rhs) {
2980   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2981       this, dst, lhs, rhs);
2982 }
2983 
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2984 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
2985                                         LiftoffRegister lhs,
2986                                         LiftoffRegister rhs) {
2987   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
2988       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
2989 }
2990 
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2991 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
2992                                         LiftoffRegister lhs,
2993                                         LiftoffRegister rhs) {
2994   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2995       this, dst, lhs, rhs);
2996 }
2997 
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)2998 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
2999                                       LiftoffRegister src) {
3000   if (dst.fp() == src.fp()) {
3001     Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3002     Psignw(dst.fp(), kScratchDoubleReg);
3003   } else {
3004     Pxor(dst.fp(), dst.fp());
3005     Psubw(dst.fp(), src.fp());
3006   }
3007 }
3008 
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3009 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3010                                           LiftoffRegister src) {
3011   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
3012 }
3013 
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3014 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3015                                           LiftoffRegister src) {
3016   XMMRegister tmp = kScratchDoubleReg;
3017   Packsswb(tmp, src.fp());
3018   Pmovmskb(dst.gp(), tmp);
3019   shrq(dst.gp(), Immediate(8));
3020 }
3021 
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3022 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3023                                       LiftoffRegister rhs) {
3024   liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3025                                                                      lhs, rhs);
3026 }
3027 
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3028 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3029                                        int32_t rhs) {
3030   liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3031       this, dst, lhs, rhs);
3032 }
3033 
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3034 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3035                                         LiftoffRegister lhs,
3036                                         LiftoffRegister rhs) {
3037   liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3038                                                                      lhs, rhs);
3039 }
3040 
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3041 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3042                                          LiftoffRegister lhs, int32_t rhs) {
3043   liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3044       this, dst, lhs, rhs);
3045 }
3046 
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3047 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3048                                         LiftoffRegister lhs,
3049                                         LiftoffRegister rhs) {
3050   liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3051                                                                      lhs, rhs);
3052 }
3053 
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3054 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3055                                          LiftoffRegister lhs, int32_t rhs) {
3056   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3057       this, dst, lhs, rhs);
3058 }
3059 
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3060 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3061                                       LiftoffRegister rhs) {
3062   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3063       this, dst, lhs, rhs);
3064 }
3065 
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3066 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3067                                             LiftoffRegister lhs,
3068                                             LiftoffRegister rhs) {
3069   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3070       this, dst, lhs, rhs);
3071 }
3072 
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3073 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3074                                             LiftoffRegister lhs,
3075                                             LiftoffRegister rhs) {
3076   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3077       this, dst, lhs, rhs);
3078 }
3079 
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3080 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3081                                       LiftoffRegister rhs) {
3082   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3083       this, dst, lhs, rhs);
3084 }
3085 
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3086 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3087                                             LiftoffRegister lhs,
3088                                             LiftoffRegister rhs) {
3089   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3090       this, dst, lhs, rhs);
3091 }
3092 
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3093 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3094                                             LiftoffRegister lhs,
3095                                             LiftoffRegister rhs) {
3096   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3097                                        &Assembler::psubusw>(this, dst, lhs,
3098                                                             rhs);
3099 }
3100 
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3101 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3102                                       LiftoffRegister rhs) {
3103   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3104       this, dst, lhs, rhs);
3105 }
3106 
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3107 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3108                                         LiftoffRegister lhs,
3109                                         LiftoffRegister rhs) {
3110   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3111       this, dst, lhs, rhs);
3112 }
3113 
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3114 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3115                                         LiftoffRegister lhs,
3116                                         LiftoffRegister rhs) {
3117   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3118       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3119 }
3120 
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3121 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3122                                         LiftoffRegister lhs,
3123                                         LiftoffRegister rhs) {
3124   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3125       this, dst, lhs, rhs);
3126 }
3127 
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3128 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3129                                         LiftoffRegister lhs,
3130                                         LiftoffRegister rhs) {
3131   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3132       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3133 }
3134 
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3135 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3136                                                           LiftoffRegister src) {
3137   I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), kScratchDoubleReg,
3138                             kScratchRegister);
3139 }
3140 
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3141 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3142                                                           LiftoffRegister src) {
3143   I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(), kScratchRegister);
3144 }
3145 
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3146 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3147                                                      LiftoffRegister src1,
3148                                                      LiftoffRegister src2) {
3149   I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3150                  /*is_signed=*/true);
3151 }
3152 
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3153 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3154                                                      LiftoffRegister src1,
3155                                                      LiftoffRegister src2) {
3156   I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3157                  /*is_signed=*/false);
3158 }
3159 
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3160 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3161                                                       LiftoffRegister src1,
3162                                                       LiftoffRegister src2) {
3163   I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3164 }
3165 
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3166 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3167                                                       LiftoffRegister src1,
3168                                                       LiftoffRegister src2) {
3169   I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3170 }
3171 
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3172 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3173                                                 LiftoffRegister src1,
3174                                                 LiftoffRegister src2) {
3175   I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3176 }
3177 
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3178 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3179                                       LiftoffRegister src) {
3180   if (dst.fp() == src.fp()) {
3181     Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3182     Psignd(dst.fp(), kScratchDoubleReg);
3183   } else {
3184     Pxor(dst.fp(), dst.fp());
3185     Psubd(dst.fp(), src.fp());
3186   }
3187 }
3188 
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3189 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3190                                           LiftoffRegister src) {
3191   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
3192 }
3193 
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3194 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3195                                           LiftoffRegister src) {
3196   Movmskps(dst.gp(), src.fp());
3197 }
3198 
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3199 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3200                                       LiftoffRegister rhs) {
3201   liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3202                                                                      lhs, rhs);
3203 }
3204 
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3205 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3206                                        int32_t rhs) {
3207   liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3208       this, dst, lhs, rhs);
3209 }
3210 
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3211 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3212                                         LiftoffRegister lhs,
3213                                         LiftoffRegister rhs) {
3214   liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3215                                                                      lhs, rhs);
3216 }
3217 
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3218 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3219                                          LiftoffRegister lhs, int32_t rhs) {
3220   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3221       this, dst, lhs, rhs);
3222 }
3223 
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3224 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3225                                         LiftoffRegister lhs,
3226                                         LiftoffRegister rhs) {
3227   liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3228                                                                      lhs, rhs);
3229 }
3230 
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3231 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3232                                          LiftoffRegister lhs, int32_t rhs) {
3233   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3234       this, dst, lhs, rhs);
3235 }
3236 
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3237 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3238                                       LiftoffRegister rhs) {
3239   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3240       this, dst, lhs, rhs);
3241 }
3242 
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3243 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3244                                       LiftoffRegister rhs) {
3245   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3246       this, dst, lhs, rhs);
3247 }
3248 
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3249 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3250                                       LiftoffRegister rhs) {
3251   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3252       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3253 }
3254 
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3255 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3256                                         LiftoffRegister lhs,
3257                                         LiftoffRegister rhs) {
3258   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3259       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3260 }
3261 
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3262 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3263                                         LiftoffRegister lhs,
3264                                         LiftoffRegister rhs) {
3265   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3266       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3267 }
3268 
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3269 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3270                                         LiftoffRegister lhs,
3271                                         LiftoffRegister rhs) {
3272   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3273       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3274 }
3275 
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3276 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3277                                         LiftoffRegister lhs,
3278                                         LiftoffRegister rhs) {
3279   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3280       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3281 }
3282 
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3283 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3284                                               LiftoffRegister lhs,
3285                                               LiftoffRegister rhs) {
3286   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3287       this, dst, lhs, rhs);
3288 }
3289 
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3290 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3291                                                           LiftoffRegister src) {
3292   I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(), kScratchRegister);
3293 }
3294 
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3295 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3296                                                           LiftoffRegister src) {
3297   I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), kScratchDoubleReg);
3298 }
3299 
3300 namespace liftoff {
3301 // Helper function to check for register aliasing, AVX support, and moves
3302 // registers around before calling the actual macro-assembler function.
I32x4ExtMulHelper(LiftoffAssembler * assm,XMMRegister dst,XMMRegister src1,XMMRegister src2,bool low,bool is_signed)3303 inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
3304                               XMMRegister src1, XMMRegister src2, bool low,
3305                               bool is_signed) {
3306   // I32x4ExtMul requires dst == src1 if AVX is not supported.
3307   if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3308     assm->I32x4ExtMul(dst, src1, src2, kScratchDoubleReg, low, is_signed);
3309   } else if (dst != src2) {
3310     // dst != src1 && dst != src2
3311     assm->movaps(dst, src1);
3312     assm->I32x4ExtMul(dst, dst, src2, kScratchDoubleReg, low, is_signed);
3313   } else {
3314     // dst == src2
3315     // Extended multiplication is commutative,
3316     assm->movaps(dst, src2);
3317     assm->I32x4ExtMul(dst, dst, src1, kScratchDoubleReg, low, is_signed);
3318   }
3319 }
3320 }  // namespace liftoff
3321 
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3322 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3323                                                      LiftoffRegister src1,
3324                                                      LiftoffRegister src2) {
3325   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3326                              /*is_signed=*/true);
3327 }
3328 
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3329 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3330                                                      LiftoffRegister src1,
3331                                                      LiftoffRegister src2) {
3332   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3333                              /*is_signed=*/false);
3334 }
3335 
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3336 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3337                                                       LiftoffRegister src1,
3338                                                       LiftoffRegister src2) {
3339   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3340                              /*low=*/false,
3341                              /*is_signed=*/true);
3342 }
3343 
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3344 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3345                                                       LiftoffRegister src1,
3346                                                       LiftoffRegister src2) {
3347   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3348                              /*low=*/false,
3349                              /*is_signed=*/false);
3350 }
3351 
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)3352 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
3353                                       LiftoffRegister src) {
3354   I64x2Neg(dst.fp(), src.fp(), kScratchDoubleReg);
3355 }
3356 
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)3357 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
3358                                           LiftoffRegister src) {
3359   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
3360 }
3361 
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3362 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
3363                                       LiftoffRegister rhs) {
3364   liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
3365                                                                      lhs, rhs);
3366 }
3367 
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3368 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
3369                                        int32_t rhs) {
3370   liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
3371       this, dst, lhs, rhs);
3372 }
3373 
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3374 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
3375                                         LiftoffRegister lhs,
3376                                         LiftoffRegister rhs) {
3377   I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchDoubleReg,
3378             liftoff::kScratchDoubleReg2, kScratchRegister);
3379 }
3380 
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3381 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
3382                                          LiftoffRegister lhs, int32_t rhs) {
3383   I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, kScratchDoubleReg);
3384 }
3385 
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3386 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
3387                                         LiftoffRegister lhs,
3388                                         LiftoffRegister rhs) {
3389   liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
3390                                                                      lhs, rhs);
3391 }
3392 
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3393 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
3394                                          LiftoffRegister lhs, int32_t rhs) {
3395   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
3396       this, dst, lhs, rhs);
3397 }
3398 
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3399 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3400                                       LiftoffRegister rhs) {
3401   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
3402       this, dst, lhs, rhs);
3403 }
3404 
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3405 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3406                                       LiftoffRegister rhs) {
3407   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
3408       this, dst, lhs, rhs);
3409 }
3410 
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3411 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3412                                       LiftoffRegister rhs) {
3413   static constexpr RegClass tmp_rc = reg_class_for(kS128);
3414   LiftoffRegister tmp1 =
3415       GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
3416   LiftoffRegister tmp2 =
3417       GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs, tmp1));
3418   I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
3419 }
3420 
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3421 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
3422                                                      LiftoffRegister src1,
3423                                                      LiftoffRegister src2) {
3424   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3425               /*is_signed=*/true);
3426 }
3427 
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3428 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
3429                                                      LiftoffRegister src1,
3430                                                      LiftoffRegister src2) {
3431   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3432               /*is_signed=*/false);
3433 }
3434 
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3435 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
3436                                                       LiftoffRegister src1,
3437                                                       LiftoffRegister src2) {
3438   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3439               /*is_signed=*/true);
3440 }
3441 
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3442 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
3443                                                       LiftoffRegister src1,
3444                                                       LiftoffRegister src2) {
3445   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3446               /*is_signed=*/false);
3447 }
3448 
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)3449 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
3450                                           LiftoffRegister src) {
3451   Movmskpd(dst.gp(), src.fp());
3452 }
3453 
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3454 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
3455                                                      LiftoffRegister src) {
3456   Pmovsxdq(dst.fp(), src.fp());
3457 }
3458 
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3459 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3460                                                       LiftoffRegister src) {
3461   I64x2SConvertI32x4High(dst.fp(), src.fp());
3462 }
3463 
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3464 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3465                                                      LiftoffRegister src) {
3466   Pmovzxdq(dst.fp(), src.fp());
3467 }
3468 
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3469 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3470                                                       LiftoffRegister src) {
3471   I64x2UConvertI32x4High(dst.fp(), src.fp(), kScratchDoubleReg);
3472 }
3473 
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3474 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3475                                       LiftoffRegister src) {
3476   Absps(dst.fp(), src.fp(), kScratchRegister);
3477 }
3478 
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3479 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3480                                       LiftoffRegister src) {
3481   Negps(dst.fp(), src.fp(), kScratchRegister);
3482 }
3483 
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3484 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3485                                        LiftoffRegister src) {
3486   Sqrtps(dst.fp(), src.fp());
3487 }
3488 
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3489 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3490                                        LiftoffRegister src) {
3491   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3492   Roundps(dst.fp(), src.fp(), kRoundUp);
3493   return true;
3494 }
3495 
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3496 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3497                                         LiftoffRegister src) {
3498   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3499   Roundps(dst.fp(), src.fp(), kRoundDown);
3500   return true;
3501 }
3502 
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3503 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3504                                         LiftoffRegister src) {
3505   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3506   Roundps(dst.fp(), src.fp(), kRoundToZero);
3507   return true;
3508 }
3509 
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3510 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3511                                               LiftoffRegister src) {
3512   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3513   Roundps(dst.fp(), src.fp(), kRoundToNearest);
3514   return true;
3515 }
3516 
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3517 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3518                                       LiftoffRegister rhs) {
3519   liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
3520       this, dst, lhs, rhs);
3521 }
3522 
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3523 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3524                                       LiftoffRegister rhs) {
3525   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
3526       this, dst, lhs, rhs);
3527 }
3528 
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3529 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3530                                       LiftoffRegister rhs) {
3531   liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
3532       this, dst, lhs, rhs);
3533 }
3534 
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3535 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3536                                       LiftoffRegister rhs) {
3537   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
3538       this, dst, lhs, rhs);
3539 }
3540 
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3541 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3542                                       LiftoffRegister rhs) {
3543   F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3544 }
3545 
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3546 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3547                                       LiftoffRegister rhs) {
3548   F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3549 }
3550 
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3551 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3552                                        LiftoffRegister rhs) {
3553   // Due to the way minps works, pmin(a, b) = minps(b, a).
3554   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
3555       this, dst, rhs, lhs);
3556 }
3557 
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3558 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3559                                        LiftoffRegister rhs) {
3560   // Due to the way maxps works, pmax(a, b) = maxps(b, a).
3561   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
3562       this, dst, rhs, lhs);
3563 }
3564 
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)3565 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
3566                                       LiftoffRegister src) {
3567   Abspd(dst.fp(), src.fp(), kScratchRegister);
3568 }
3569 
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)3570 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
3571                                       LiftoffRegister src) {
3572   Negpd(dst.fp(), src.fp(), kScratchRegister);
3573 }
3574 
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)3575 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
3576                                        LiftoffRegister src) {
3577   Sqrtpd(dst.fp(), src.fp());
3578 }
3579 
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)3580 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
3581                                        LiftoffRegister src) {
3582   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3583   Roundpd(dst.fp(), src.fp(), kRoundUp);
3584   return true;
3585 }
3586 
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)3587 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
3588                                         LiftoffRegister src) {
3589   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3590   Roundpd(dst.fp(), src.fp(), kRoundDown);
3591   return true;
3592 }
3593 
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)3594 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
3595                                         LiftoffRegister src) {
3596   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3597   Roundpd(dst.fp(), src.fp(), kRoundToZero);
3598   return true;
3599 }
3600 
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)3601 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
3602                                               LiftoffRegister src) {
3603   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3604   Roundpd(dst.fp(), src.fp(), kRoundToNearest);
3605   return true;
3606 }
3607 
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3608 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3609                                       LiftoffRegister rhs) {
3610   liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
3611       this, dst, lhs, rhs);
3612 }
3613 
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3614 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3615                                       LiftoffRegister rhs) {
3616   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
3617       this, dst, lhs, rhs);
3618 }
3619 
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3620 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3621                                       LiftoffRegister rhs) {
3622   liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
3623       this, dst, lhs, rhs);
3624 }
3625 
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3626 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
3627                                       LiftoffRegister rhs) {
3628   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
3629       this, dst, lhs, rhs);
3630 }
3631 
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3632 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
3633                                       LiftoffRegister rhs) {
3634   F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3635 }
3636 
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3637 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
3638                                       LiftoffRegister rhs) {
3639   F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3640 }
3641 
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3642 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3643                                        LiftoffRegister rhs) {
3644   // Due to the way minpd works, pmin(a, b) = minpd(b, a).
3645   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
3646       this, dst, rhs, lhs);
3647 }
3648 
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3649 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3650                                        LiftoffRegister rhs) {
3651   // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
3652   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
3653       this, dst, rhs, lhs);
3654 }
3655 
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)3656 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
3657                                                       LiftoffRegister src) {
3658   Cvtdq2pd(dst.fp(), src.fp());
3659 }
3660 
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)3661 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
3662                                                       LiftoffRegister src) {
3663   F64x2ConvertLowI32x4U(dst.fp(), src.fp(), kScratchRegister);
3664 }
3665 
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)3666 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
3667                                                     LiftoffRegister src) {
3668   Cvtps2pd(dst.fp(), src.fp());
3669 }
3670 
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3671 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3672                                                  LiftoffRegister src) {
3673   I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, kScratchRegister);
3674 }
3675 
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3676 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3677                                                  LiftoffRegister src) {
3678   // NAN->0, negative->0.
3679   Pxor(kScratchDoubleReg, kScratchDoubleReg);
3680   if (CpuFeatures::IsSupported(AVX)) {
3681     CpuFeatureScope scope(this, AVX);
3682     vmaxps(dst.fp(), src.fp(), kScratchDoubleReg);
3683   } else {
3684     if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3685     maxps(dst.fp(), kScratchDoubleReg);
3686   }
3687   // scratch: float representation of max_signed.
3688   Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3689   Psrld(kScratchDoubleReg, uint8_t{1});            // 0x7fffffff
3690   Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
3691   // scratch2: convert (src-max_signed).
3692   // Set positive overflow lanes to 0x7FFFFFFF.
3693   // Set negative lanes to 0.
3694   if (CpuFeatures::IsSupported(AVX)) {
3695     CpuFeatureScope scope(this, AVX);
3696     vsubps(liftoff::kScratchDoubleReg2, dst.fp(), kScratchDoubleReg);
3697   } else {
3698     movaps(liftoff::kScratchDoubleReg2, dst.fp());
3699     subps(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3700   }
3701   Cmpleps(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
3702   Cvttps2dq(liftoff::kScratchDoubleReg2, liftoff::kScratchDoubleReg2);
3703   Pxor(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3704   Pxor(kScratchDoubleReg, kScratchDoubleReg);
3705   Pmaxsd(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3706   // Convert to int. Overflow lanes above max_signed will be 0x80000000.
3707   Cvttps2dq(dst.fp(), dst.fp());
3708   // Add (src-max_signed) for overflow lanes.
3709   Paddd(dst.fp(), liftoff::kScratchDoubleReg2);
3710 }
3711 
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3712 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3713                                                  LiftoffRegister src) {
3714   Cvtdq2ps(dst.fp(), src.fp());
3715 }
3716 
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3717 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3718                                                  LiftoffRegister src) {
3719   Pxor(kScratchDoubleReg, kScratchDoubleReg);           // Zeros.
3720   Pblendw(kScratchDoubleReg, src.fp(), uint8_t{0x55});  // Get lo 16 bits.
3721   if (CpuFeatures::IsSupported(AVX)) {
3722     CpuFeatureScope scope(this, AVX);
3723     vpsubd(dst.fp(), src.fp(), kScratchDoubleReg);  // Get hi 16 bits.
3724   } else {
3725     if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3726     psubd(dst.fp(), kScratchDoubleReg);
3727   }
3728   Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // Convert lo exactly.
3729   Psrld(dst.fp(), byte{1});            // Divide by 2 to get in unsigned range.
3730   Cvtdq2ps(dst.fp(), dst.fp());        // Convert hi, exactly.
3731   Addps(dst.fp(), dst.fp());           // Double hi, exactly.
3732   Addps(dst.fp(), kScratchDoubleReg);  // Add hi and lo, may round.
3733 }
3734 
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)3735 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3736                                                     LiftoffRegister src) {
3737   Cvtpd2ps(dst.fp(), src.fp());
3738 }
3739 
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3740 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3741                                                  LiftoffRegister lhs,
3742                                                  LiftoffRegister rhs) {
3743   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
3744                                        &Assembler::packsswb>(this, dst, lhs,
3745                                                              rhs);
3746 }
3747 
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3748 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3749                                                  LiftoffRegister lhs,
3750                                                  LiftoffRegister rhs) {
3751   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
3752                                        &Assembler::packuswb>(this, dst, lhs,
3753                                                              rhs);
3754 }
3755 
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3756 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3757                                                  LiftoffRegister lhs,
3758                                                  LiftoffRegister rhs) {
3759   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
3760                                        &Assembler::packssdw>(this, dst, lhs,
3761                                                              rhs);
3762 }
3763 
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3764 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3765                                                  LiftoffRegister lhs,
3766                                                  LiftoffRegister rhs) {
3767   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
3768                                        &Assembler::packusdw>(this, dst, lhs,
3769                                                              rhs, SSE4_1);
3770 }
3771 
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3772 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3773                                                      LiftoffRegister src) {
3774   Pmovsxbw(dst.fp(), src.fp());
3775 }
3776 
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3777 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3778                                                       LiftoffRegister src) {
3779   I16x8SConvertI8x16High(dst.fp(), src.fp());
3780 }
3781 
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3782 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3783                                                      LiftoffRegister src) {
3784   Pmovzxbw(dst.fp(), src.fp());
3785 }
3786 
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3787 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3788                                                       LiftoffRegister src) {
3789   I16x8UConvertI8x16High(dst.fp(), src.fp(), kScratchDoubleReg);
3790 }
3791 
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3792 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3793                                                      LiftoffRegister src) {
3794   Pmovsxwd(dst.fp(), src.fp());
3795 }
3796 
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3797 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3798                                                       LiftoffRegister src) {
3799   I32x4SConvertI16x8High(dst.fp(), src.fp());
3800 }
3801 
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3802 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3803                                                      LiftoffRegister src) {
3804   Pmovzxwd(dst.fp(), src.fp());
3805 }
3806 
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3807 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3808                                                       LiftoffRegister src) {
3809   I32x4UConvertI16x8High(dst.fp(), src.fp(), kScratchDoubleReg);
3810 }
3811 
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)3812 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
3813                                                          LiftoffRegister src) {
3814   I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), kScratchDoubleReg,
3815                           kScratchRegister);
3816 }
3817 
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)3818 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
3819                                                          LiftoffRegister src) {
3820   I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), kScratchDoubleReg,
3821                           kScratchRegister);
3822 }
3823 
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3824 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
3825                                          LiftoffRegister lhs,
3826                                          LiftoffRegister rhs) {
3827   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
3828       this, dst, rhs, lhs);
3829 }
3830 
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3831 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3832                                                      LiftoffRegister lhs,
3833                                                      LiftoffRegister rhs) {
3834   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
3835       this, dst, lhs, rhs);
3836 }
3837 
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3838 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3839                                                      LiftoffRegister lhs,
3840                                                      LiftoffRegister rhs) {
3841   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
3842       this, dst, lhs, rhs);
3843 }
3844 
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)3845 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3846                                       LiftoffRegister src) {
3847   Pabsb(dst.fp(), src.fp());
3848 }
3849 
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)3850 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3851                                       LiftoffRegister src) {
3852   Pabsw(dst.fp(), src.fp());
3853 }
3854 
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)3855 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3856                                       LiftoffRegister src) {
3857   Pabsd(dst.fp(), src.fp());
3858 }
3859 
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)3860 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3861                                       LiftoffRegister src) {
3862   I64x2Abs(dst.fp(), src.fp(), kScratchDoubleReg);
3863 }
3864 
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3865 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3866                                                  LiftoffRegister lhs,
3867                                                  uint8_t imm_lane_idx) {
3868   Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3869   movsxbl(dst.gp(), dst.gp());
3870 }
3871 
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3872 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3873                                                  LiftoffRegister lhs,
3874                                                  uint8_t imm_lane_idx) {
3875   Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3876 }
3877 
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3878 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3879                                                  LiftoffRegister lhs,
3880                                                  uint8_t imm_lane_idx) {
3881   Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3882   movsxwl(dst.gp(), dst.gp());
3883 }
3884 
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3885 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3886                                                  LiftoffRegister lhs,
3887                                                  uint8_t imm_lane_idx) {
3888   Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3889 }
3890 
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3891 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3892                                                LiftoffRegister lhs,
3893                                                uint8_t imm_lane_idx) {
3894   Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
3895 }
3896 
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3897 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
3898                                                LiftoffRegister lhs,
3899                                                uint8_t imm_lane_idx) {
3900   Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
3901 }
3902 
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3903 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
3904                                                LiftoffRegister lhs,
3905                                                uint8_t imm_lane_idx) {
3906   F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3907 }
3908 
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3909 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
3910                                                LiftoffRegister lhs,
3911                                                uint8_t imm_lane_idx) {
3912   F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3913 }
3914 
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3915 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3916                                                LiftoffRegister src1,
3917                                                LiftoffRegister src2,
3918                                                uint8_t imm_lane_idx) {
3919   if (CpuFeatures::IsSupported(AVX)) {
3920     CpuFeatureScope scope(this, AVX);
3921     vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3922   } else {
3923     CpuFeatureScope scope(this, SSE4_1);
3924     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3925     pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
3926   }
3927 }
3928 
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3929 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3930                                                LiftoffRegister src1,
3931                                                LiftoffRegister src2,
3932                                                uint8_t imm_lane_idx) {
3933   if (CpuFeatures::IsSupported(AVX)) {
3934     CpuFeatureScope scope(this, AVX);
3935     vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3936   } else {
3937     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3938     pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
3939   }
3940 }
3941 
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3942 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3943                                                LiftoffRegister src1,
3944                                                LiftoffRegister src2,
3945                                                uint8_t imm_lane_idx) {
3946   if (CpuFeatures::IsSupported(AVX)) {
3947     CpuFeatureScope scope(this, AVX);
3948     vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3949   } else {
3950     CpuFeatureScope scope(this, SSE4_1);
3951     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3952     pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
3953   }
3954 }
3955 
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3956 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
3957                                                LiftoffRegister src1,
3958                                                LiftoffRegister src2,
3959                                                uint8_t imm_lane_idx) {
3960   if (CpuFeatures::IsSupported(AVX)) {
3961     CpuFeatureScope scope(this, AVX);
3962     vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3963   } else {
3964     CpuFeatureScope scope(this, SSE4_1);
3965     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3966     pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
3967   }
3968 }
3969 
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3970 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
3971                                                LiftoffRegister src1,
3972                                                LiftoffRegister src2,
3973                                                uint8_t imm_lane_idx) {
3974   if (CpuFeatures::IsSupported(AVX)) {
3975     CpuFeatureScope scope(this, AVX);
3976     vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3977   } else {
3978     CpuFeatureScope scope(this, SSE4_1);
3979     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3980     insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3981   }
3982 }
3983 
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3984 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
3985                                                LiftoffRegister src1,
3986                                                LiftoffRegister src2,
3987                                                uint8_t imm_lane_idx) {
3988   F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
3989 }
3990 
StackCheck(Label * ool_code,Register limit_address)3991 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
3992   cmpq(rsp, Operand(limit_address, 0));
3993   j(below_equal, ool_code);
3994 }
3995 
CallTrapCallbackForTesting()3996 void LiftoffAssembler::CallTrapCallbackForTesting() {
3997   PrepareCallCFunction(0);
3998   CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3999 }
4000 
AssertUnreachable(AbortReason reason)4001 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4002   TurboAssembler::AssertUnreachable(reason);
4003 }
4004 
PushRegisters(LiftoffRegList regs)4005 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4006   LiftoffRegList gp_regs = regs & kGpCacheRegList;
4007   while (!gp_regs.is_empty()) {
4008     LiftoffRegister reg = gp_regs.GetFirstRegSet();
4009     pushq(reg.gp());
4010     gp_regs.clear(reg);
4011   }
4012   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4013   unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4014   if (num_fp_regs) {
4015     AllocateStackSpace(num_fp_regs * kSimd128Size);
4016     unsigned offset = 0;
4017     while (!fp_regs.is_empty()) {
4018       LiftoffRegister reg = fp_regs.GetFirstRegSet();
4019       Movdqu(Operand(rsp, offset), reg.fp());
4020       fp_regs.clear(reg);
4021       offset += kSimd128Size;
4022     }
4023     DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4024   }
4025 }
4026 
PopRegisters(LiftoffRegList regs)4027 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4028   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4029   unsigned fp_offset = 0;
4030   while (!fp_regs.is_empty()) {
4031     LiftoffRegister reg = fp_regs.GetFirstRegSet();
4032     Movdqu(reg.fp(), Operand(rsp, fp_offset));
4033     fp_regs.clear(reg);
4034     fp_offset += kSimd128Size;
4035   }
4036   if (fp_offset) addq(rsp, Immediate(fp_offset));
4037   LiftoffRegList gp_regs = regs & kGpCacheRegList;
4038   while (!gp_regs.is_empty()) {
4039     LiftoffRegister reg = gp_regs.GetLastRegSet();
4040     popq(reg.gp());
4041     gp_regs.clear(reg);
4042   }
4043 }
4044 
RecordSpillsInSafepoint(Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4045 void LiftoffAssembler::RecordSpillsInSafepoint(Safepoint& safepoint,
4046                                                LiftoffRegList all_spills,
4047                                                LiftoffRegList ref_spills,
4048                                                int spill_offset) {
4049   int spill_space_size = 0;
4050   while (!all_spills.is_empty()) {
4051     LiftoffRegister reg = all_spills.GetFirstRegSet();
4052     if (ref_spills.has(reg)) {
4053       safepoint.DefinePointerSlot(spill_offset);
4054     }
4055     all_spills.clear(reg);
4056     ++spill_offset;
4057     spill_space_size += kSystemPointerSize;
4058   }
4059   // Record the number of additional spill slots.
4060   RecordOolSpillSpaceSize(spill_space_size);
4061 }
4062 
DropStackSlotsAndRet(uint32_t num_stack_slots)4063 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4064   DCHECK_LT(num_stack_slots,
4065             (1 << 16) / kSystemPointerSize);  // 16 bit immediate
4066   ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4067 }
4068 
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4069 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4070                              const LiftoffRegister* args,
4071                              const LiftoffRegister* rets,
4072                              ValueKind out_argument_kind, int stack_bytes,
4073                              ExternalReference ext_ref) {
4074   AllocateStackSpace(stack_bytes);
4075 
4076   int arg_bytes = 0;
4077   for (ValueKind param_kind : sig->parameters()) {
4078     liftoff::Store(this, Operand(rsp, arg_bytes), *args++, param_kind);
4079     arg_bytes += element_size_bytes(param_kind);
4080   }
4081   DCHECK_LE(arg_bytes, stack_bytes);
4082 
4083   // Pass a pointer to the buffer with the arguments to the C function.
4084   movq(arg_reg_1, rsp);
4085 
4086   constexpr int kNumCCallArgs = 1;
4087 
4088   // Now call the C function.
4089   PrepareCallCFunction(kNumCCallArgs);
4090   CallCFunction(ext_ref, kNumCCallArgs);
4091 
4092   // Move return value to the right register.
4093   const LiftoffRegister* next_result_reg = rets;
4094   if (sig->return_count() > 0) {
4095     DCHECK_EQ(1, sig->return_count());
4096     constexpr Register kReturnReg = rax;
4097     if (kReturnReg != next_result_reg->gp()) {
4098       Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4099     }
4100     ++next_result_reg;
4101   }
4102 
4103   // Load potential output value from the buffer on the stack.
4104   if (out_argument_kind != kVoid) {
4105     liftoff::Load(this, *next_result_reg, Operand(rsp, 0), out_argument_kind);
4106   }
4107 
4108   addq(rsp, Immediate(stack_bytes));
4109 }
4110 
CallNativeWasmCode(Address addr)4111 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4112   near_call(addr, RelocInfo::WASM_CALL);
4113 }
4114 
TailCallNativeWasmCode(Address addr)4115 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4116   near_jmp(addr, RelocInfo::WASM_CALL);
4117 }
4118 
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4119 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4120                                     compiler::CallDescriptor* call_descriptor,
4121                                     Register target) {
4122   if (target == no_reg) {
4123     popq(kScratchRegister);
4124     target = kScratchRegister;
4125   }
4126   call(target);
4127 }
4128 
TailCallIndirect(Register target)4129 void LiftoffAssembler::TailCallIndirect(Register target) {
4130   if (target == no_reg) {
4131     popq(kScratchRegister);
4132     target = kScratchRegister;
4133   }
4134   jmp(target);
4135 }
4136 
CallRuntimeStub(WasmCode::RuntimeStubId sid)4137 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4138   // A direct call to a wasm runtime stub defined in this module.
4139   // Just encode the stub index. This will be patched at relocation.
4140   near_call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4141 }
4142 
AllocateStackSlot(Register addr,uint32_t size)4143 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4144   AllocateStackSpace(size);
4145   movq(addr, rsp);
4146 }
4147 
DeallocateStackSlot(uint32_t size)4148 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4149   addq(rsp, Immediate(size));
4150 }
4151 
MaybeOSR()4152 void LiftoffAssembler::MaybeOSR() {
4153   cmpq(liftoff::GetOSRTargetSlot(), Immediate(0));
4154   j(not_equal, static_cast<Address>(WasmCode::kWasmOnStackReplace),
4155     RelocInfo::WASM_STUB_CALL);
4156 }
4157 
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4158 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4159                                        ValueKind kind) {
4160   if (kind == kF32) {
4161     Ucomiss(src, src);
4162   } else {
4163     DCHECK_EQ(kind, kF64);
4164     Ucomisd(src, src);
4165   }
4166   Label ret;
4167   j(parity_odd, &ret);
4168   movl(Operand(dst, 0), Immediate(1));
4169   bind(&ret);
4170 }
4171 
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4172 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4173                                             Register tmp_gp,
4174                                             LiftoffRegister tmp_s128,
4175                                             ValueKind lane_kind) {
4176   if (lane_kind == kF32) {
4177     movaps(tmp_s128.fp(), src.fp());
4178     cmpunordps(tmp_s128.fp(), tmp_s128.fp());
4179   } else {
4180     DCHECK_EQ(lane_kind, kF64);
4181     movapd(tmp_s128.fp(), src.fp());
4182     cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
4183   }
4184   pmovmskb(tmp_gp, tmp_s128.fp());
4185   orl(Operand(dst, 0), tmp_gp);
4186 }
4187 
Construct(int param_slots)4188 void LiftoffStackSlots::Construct(int param_slots) {
4189   DCHECK_LT(0, slots_.size());
4190   SortInPushOrder();
4191   int last_stack_slot = param_slots;
4192   for (auto& slot : slots_) {
4193     const int stack_slot = slot.dst_slot_;
4194     int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4195     last_stack_slot = stack_slot;
4196     const LiftoffAssembler::VarState& src = slot.src_;
4197     DCHECK_LT(0, stack_decrement);
4198     switch (src.loc()) {
4199       case LiftoffAssembler::VarState::kStack:
4200         if (src.kind() == kI32) {
4201           asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4202           // Load i32 values to a register first to ensure they are zero
4203           // extended.
4204           asm_->movl(kScratchRegister, liftoff::GetStackSlot(slot.src_offset_));
4205           asm_->pushq(kScratchRegister);
4206         } else if (src.kind() == kS128) {
4207           asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4208           // Since offsets are subtracted from sp, we need a smaller offset to
4209           // push the top of a s128 value.
4210           asm_->pushq(liftoff::GetStackSlot(slot.src_offset_ - 8));
4211           asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4212         } else {
4213           asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4214           // For all other types, just push the whole (8-byte) stack slot.
4215           // This is also ok for f32 values (even though we copy 4 uninitialized
4216           // bytes), because f32 and f64 values are clearly distinguished in
4217           // Turbofan, so the uninitialized bytes are never accessed.
4218           asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4219         }
4220         break;
4221       case LiftoffAssembler::VarState::kRegister: {
4222         int pushed = src.kind() == kS128 ? kSimd128Size : kSystemPointerSize;
4223         liftoff::push(asm_, src.reg(), src.kind(), stack_decrement - pushed);
4224         break;
4225       }
4226       case LiftoffAssembler::VarState::kIntConst:
4227         asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4228         asm_->pushq(Immediate(src.i32_const()));
4229         break;
4230     }
4231   }
4232 }
4233 
4234 #undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
4235 
4236 }  // namespace wasm
4237 }  // namespace internal
4238 }  // namespace v8
4239 
4240 #endif  // V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
4241