1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
6 #define V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
7
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/assembler.h"
10 #include "src/codegen/cpu-features.h"
11 #include "src/codegen/machine-type.h"
12 #include "src/codegen/x64/register-x64.h"
13 #include "src/heap/memory-chunk.h"
14 #include "src/wasm/baseline/liftoff-assembler.h"
15 #include "src/wasm/simd-shuffle.h"
16 #include "src/wasm/wasm-objects.h"
17
18 namespace v8 {
19 namespace internal {
20 namespace wasm {
21
22 #define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name) \
23 if (!CpuFeatures::IsSupported(name)) return false; \
24 CpuFeatureScope feature(this, name);
25
26 namespace liftoff {
27
ToCondition(LiftoffCondition liftoff_cond)28 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
29 switch (liftoff_cond) {
30 case kEqual:
31 return equal;
32 case kUnequal:
33 return not_equal;
34 case kSignedLessThan:
35 return less;
36 case kSignedLessEqual:
37 return less_equal;
38 case kSignedGreaterThan:
39 return greater;
40 case kSignedGreaterEqual:
41 return greater_equal;
42 case kUnsignedLessThan:
43 return below;
44 case kUnsignedLessEqual:
45 return below_equal;
46 case kUnsignedGreaterThan:
47 return above;
48 case kUnsignedGreaterEqual:
49 return above_equal;
50 }
51 }
52
53 constexpr Register kScratchRegister2 = r11;
54 static_assert(kScratchRegister != kScratchRegister2, "collision");
55 static_assert((kLiftoffAssemblerGpCacheRegs &
56 Register::ListOf(kScratchRegister, kScratchRegister2)) == 0,
57 "scratch registers must not be used as cache registers");
58
59 constexpr DoubleRegister kScratchDoubleReg2 = xmm14;
60 static_assert(kScratchDoubleReg != kScratchDoubleReg2, "collision");
61 static_assert((kLiftoffAssemblerFpCacheRegs &
62 DoubleRegister::ListOf(kScratchDoubleReg, kScratchDoubleReg2)) ==
63 0,
64 "scratch registers must not be used as cache registers");
65
66 // rbp-8 holds the stack marker, rbp-16 is the instance parameter.
67 constexpr int kInstanceOffset = 16;
68
GetStackSlot(int offset)69 inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); }
70
71 // TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
GetInstanceOperand()72 inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
73
GetOSRTargetSlot()74 inline Operand GetOSRTargetSlot() { return GetStackSlot(kOSRTargetOffset); }
75
GetMemOp(LiftoffAssembler * assm,Register addr,Register offset,uintptr_t offset_imm)76 inline Operand GetMemOp(LiftoffAssembler* assm, Register addr, Register offset,
77 uintptr_t offset_imm) {
78 if (is_uint31(offset_imm)) {
79 int32_t offset_imm32 = static_cast<int32_t>(offset_imm);
80 return offset == no_reg ? Operand(addr, offset_imm32)
81 : Operand(addr, offset, times_1, offset_imm32);
82 }
83 // Offset immediate does not fit in 31 bits.
84 Register scratch = kScratchRegister;
85 assm->TurboAssembler::Move(scratch, offset_imm);
86 if (offset != no_reg) assm->addq(scratch, offset);
87 return Operand(addr, scratch, times_1, 0);
88 }
89
Load(LiftoffAssembler * assm,LiftoffRegister dst,Operand src,ValueKind kind)90 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Operand src,
91 ValueKind kind) {
92 switch (kind) {
93 case kI32:
94 assm->movl(dst.gp(), src);
95 break;
96 case kI64:
97 case kOptRef:
98 case kRef:
99 case kRtt:
100 case kRttWithDepth:
101 assm->movq(dst.gp(), src);
102 break;
103 case kF32:
104 assm->Movss(dst.fp(), src);
105 break;
106 case kF64:
107 assm->Movsd(dst.fp(), src);
108 break;
109 case kS128:
110 assm->Movdqu(dst.fp(), src);
111 break;
112 default:
113 UNREACHABLE();
114 }
115 }
116
Store(LiftoffAssembler * assm,Operand dst,LiftoffRegister src,ValueKind kind)117 inline void Store(LiftoffAssembler* assm, Operand dst, LiftoffRegister src,
118 ValueKind kind) {
119 switch (kind) {
120 case kI32:
121 assm->movl(dst, src.gp());
122 break;
123 case kI64:
124 assm->movq(dst, src.gp());
125 break;
126 case kOptRef:
127 case kRef:
128 case kRtt:
129 case kRttWithDepth:
130 assm->StoreTaggedField(dst, src.gp());
131 break;
132 case kF32:
133 assm->Movss(dst, src.fp());
134 break;
135 case kF64:
136 assm->Movsd(dst, src.fp());
137 break;
138 case kS128:
139 assm->Movdqu(dst, src.fp());
140 break;
141 default:
142 UNREACHABLE();
143 }
144 }
145
146 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
147 int padding = 0) {
148 switch (kind) {
149 case kI32:
150 case kI64:
151 case kRef:
152 case kOptRef:
153 assm->AllocateStackSpace(padding);
154 assm->pushq(reg.gp());
155 break;
156 case kF32:
157 assm->AllocateStackSpace(kSystemPointerSize + padding);
158 assm->Movss(Operand(rsp, 0), reg.fp());
159 break;
160 case kF64:
161 assm->AllocateStackSpace(kSystemPointerSize + padding);
162 assm->Movsd(Operand(rsp, 0), reg.fp());
163 break;
164 case kS128:
165 assm->AllocateStackSpace(kSystemPointerSize * 2 + padding);
166 assm->Movdqu(Operand(rsp, 0), reg.fp());
167 break;
168 default:
169 UNREACHABLE();
170 }
171 }
172
173 constexpr int kSubSpSize = 7; // 7 bytes for "subq rsp, <imm32>"
174
175 } // namespace liftoff
176
PrepareStackFrame()177 int LiftoffAssembler::PrepareStackFrame() {
178 int offset = pc_offset();
179 // Next we reserve the memory for the whole stack frame. We do not know yet
180 // how big the stack frame will be so we just emit a placeholder instruction.
181 // PatchPrepareStackFrame will patch this in order to increase the stack
182 // appropriately.
183 sub_sp_32(0);
184 DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset);
185 return offset;
186 }
187
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)188 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
189 int stack_param_delta) {
190 // Push the return address and frame pointer to complete the stack frame.
191 pushq(Operand(rbp, 8));
192 pushq(Operand(rbp, 0));
193
194 // Shift the whole frame upwards.
195 const int slot_count = num_callee_stack_params + 2;
196 for (int i = slot_count - 1; i >= 0; --i) {
197 movq(kScratchRegister, Operand(rsp, i * 8));
198 movq(Operand(rbp, (i - stack_param_delta) * 8), kScratchRegister);
199 }
200
201 // Set the new stack and frame pointer.
202 leaq(rsp, Operand(rbp, -stack_param_delta * 8));
203 popq(rbp);
204 }
205
AlignFrameSize()206 void LiftoffAssembler::AlignFrameSize() {
207 max_used_spill_offset_ = RoundUp(max_used_spill_offset_, kSystemPointerSize);
208 }
209
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)210 void LiftoffAssembler::PatchPrepareStackFrame(
211 int offset, SafepointTableBuilder* safepoint_table_builder) {
212 // The frame_size includes the frame marker and the instance slot. Both are
213 // pushed as part of frame construction, so we don't need to allocate memory
214 // for them anymore.
215 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
216 DCHECK_EQ(0, frame_size % kSystemPointerSize);
217
218 // We can't run out of space when patching, just pass anything big enough to
219 // not cause the assembler to try to grow the buffer.
220 constexpr int kAvailableSpace = 64;
221 Assembler patching_assembler(
222 AssemblerOptions{},
223 ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
224
225 if (V8_LIKELY(frame_size < 4 * KB)) {
226 // This is the standard case for small frames: just subtract from SP and be
227 // done with it.
228 patching_assembler.sub_sp_32(frame_size);
229 DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
230 return;
231 }
232
233 // The frame size is bigger than 4KB, so we might overflow the available stack
234 // space if we first allocate the frame and then do the stack check (we will
235 // need some remaining stack space for throwing the exception). That's why we
236 // check the available stack space before we allocate the frame. To do this we
237 // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
238 // "extended stack check".
239 //
240 // The OOL code can simply be generated here with the normal assembler,
241 // because all other code generation, including OOL code, has already finished
242 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
243 // to the current {pc_offset()} to execute the OOL code for allocating the
244 // large frame.
245
246 // Emit the unconditional branch in the function prologue (from {offset} to
247 // {pc_offset()}).
248 patching_assembler.jmp_rel(pc_offset() - offset);
249 DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
250 patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
251
252 // If the frame is bigger than the stack, we throw the stack overflow
253 // exception unconditionally. Thereby we can avoid the integer overflow
254 // check in the condition code.
255 RecordComment("OOL: stack check for large frame");
256 Label continuation;
257 if (frame_size < FLAG_stack_size * 1024) {
258 movq(kScratchRegister,
259 FieldOperand(kWasmInstanceRegister,
260 WasmInstanceObject::kRealStackLimitAddressOffset));
261 movq(kScratchRegister, Operand(kScratchRegister, 0));
262 addq(kScratchRegister, Immediate(frame_size));
263 cmpq(rsp, kScratchRegister);
264 j(above_equal, &continuation, Label::kNear);
265 }
266
267 near_call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
268 // The call will not return; just define an empty safepoint.
269 safepoint_table_builder->DefineSafepoint(this);
270 AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
271
272 bind(&continuation);
273
274 // Now allocate the stack space. Note that this might do more than just
275 // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
276 AllocateStackSpace(frame_size);
277
278 // Jump back to the start of the function, from {pc_offset()} to
279 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
280 // is a branch now).
281 int func_start_offset = offset + liftoff::kSubSpSize;
282 jmp_rel(func_start_offset - pc_offset());
283 }
284
FinishCode()285 void LiftoffAssembler::FinishCode() {}
286
AbortCompilation()287 void LiftoffAssembler::AbortCompilation() {}
288
289 // static
StaticStackFrameSize()290 constexpr int LiftoffAssembler::StaticStackFrameSize() {
291 return kOSRTargetOffset;
292 }
293
SlotSizeForType(ValueKind kind)294 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
295 return is_reference(kind) ? kSystemPointerSize : element_size_bytes(kind);
296 }
297
NeedsAlignment(ValueKind kind)298 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
299 return is_reference(kind);
300 }
301
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)302 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
303 RelocInfo::Mode rmode) {
304 switch (value.type().kind()) {
305 case kI32:
306 if (value.to_i32() == 0 && RelocInfo::IsNone(rmode)) {
307 xorl(reg.gp(), reg.gp());
308 } else {
309 movl(reg.gp(), Immediate(value.to_i32(), rmode));
310 }
311 break;
312 case kI64:
313 if (RelocInfo::IsNone(rmode)) {
314 TurboAssembler::Move(reg.gp(), value.to_i64());
315 } else {
316 movq(reg.gp(), Immediate64(value.to_i64(), rmode));
317 }
318 break;
319 case kF32:
320 TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
321 break;
322 case kF64:
323 TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
324 break;
325 default:
326 UNREACHABLE();
327 }
328 }
329
LoadInstanceFromFrame(Register dst)330 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
331 movq(dst, liftoff::GetInstanceOperand());
332 }
333
LoadFromInstance(Register dst,Register instance,int offset,int size)334 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
335 int offset, int size) {
336 DCHECK_LE(0, offset);
337 Operand src{instance, offset};
338 switch (size) {
339 case 1:
340 movzxbl(dst, src);
341 break;
342 case 4:
343 movl(dst, src);
344 break;
345 case 8:
346 movq(dst, src);
347 break;
348 default:
349 UNIMPLEMENTED();
350 }
351 }
352
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)353 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
354 Register instance,
355 int offset) {
356 DCHECK_LE(0, offset);
357 LoadTaggedPointerField(dst, Operand(instance, offset));
358 }
359
LoadExternalPointer(Register dst,Register instance,int offset,ExternalPointerTag tag,Register isolate_root)360 void LiftoffAssembler::LoadExternalPointer(Register dst, Register instance,
361 int offset, ExternalPointerTag tag,
362 Register isolate_root) {
363 LoadExternalPointerField(dst, FieldOperand(instance, offset), tag,
364 isolate_root,
365 IsolateRootLocation::kInScratchRegister);
366 }
367
SpillInstance(Register instance)368 void LiftoffAssembler::SpillInstance(Register instance) {
369 movq(liftoff::GetInstanceOperand(), instance);
370 }
371
ResetOSRTarget()372 void LiftoffAssembler::ResetOSRTarget() {
373 movq(liftoff::GetOSRTargetSlot(), Immediate(0));
374 }
375
FillInstanceInto(Register dst)376 void LiftoffAssembler::FillInstanceInto(Register dst) {
377 movq(dst, liftoff::GetInstanceOperand());
378 }
379
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)380 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
381 Register offset_reg,
382 int32_t offset_imm,
383 LiftoffRegList pinned) {
384 DCHECK_GE(offset_imm, 0);
385 if (FLAG_debug_code && offset_reg != no_reg) {
386 AssertZeroExtended(offset_reg);
387 }
388 Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg,
389 static_cast<uint32_t>(offset_imm));
390 LoadTaggedPointerField(dst, src_op);
391 }
392
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)393 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
394 int32_t offset_imm) {
395 Operand src_op = liftoff::GetMemOp(this, src_addr, no_reg,
396 static_cast<uint32_t>(offset_imm));
397 movq(dst, src_op);
398 }
399
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)400 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
401 Register offset_reg,
402 int32_t offset_imm,
403 LiftoffRegister src,
404 LiftoffRegList pinned,
405 SkipWriteBarrier skip_write_barrier) {
406 DCHECK_GE(offset_imm, 0);
407 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg,
408 static_cast<uint32_t>(offset_imm));
409 StoreTaggedField(dst_op, src.gp());
410
411 if (skip_write_barrier || FLAG_disable_write_barriers) return;
412
413 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
414 Label write_barrier;
415 Label exit;
416 CheckPageFlag(dst_addr, scratch,
417 MemoryChunk::kPointersFromHereAreInterestingMask, not_zero,
418 &write_barrier, Label::kNear);
419 jmp(&exit, Label::kNear);
420 bind(&write_barrier);
421 JumpIfSmi(src.gp(), &exit, Label::kNear);
422 if (COMPRESS_POINTERS_BOOL) {
423 DecompressTaggedPointer(src.gp(), src.gp());
424 }
425 CheckPageFlag(src.gp(), scratch,
426 MemoryChunk::kPointersToHereAreInterestingMask, zero, &exit,
427 Label::kNear);
428 leaq(scratch, dst_op);
429
430 CallRecordWriteStubSaveRegisters(
431 dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
432 StubCallMode::kCallWasmRuntimeStub);
433 bind(&exit);
434 }
435
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned)436 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
437 Register offset_reg, uintptr_t offset_imm,
438 LoadType type, LiftoffRegList pinned) {
439 Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
440 }
441
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)442 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
443 Register offset_reg, uintptr_t offset_imm,
444 LoadType type, LiftoffRegList pinned,
445 uint32_t* protected_load_pc, bool is_load_mem,
446 bool i64_offset) {
447 if (offset_reg != no_reg && !i64_offset) {
448 AssertZeroExtended(offset_reg);
449 }
450 Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
451 if (protected_load_pc) *protected_load_pc = pc_offset();
452 switch (type.value()) {
453 case LoadType::kI32Load8U:
454 case LoadType::kI64Load8U:
455 movzxbl(dst.gp(), src_op);
456 break;
457 case LoadType::kI32Load8S:
458 movsxbl(dst.gp(), src_op);
459 break;
460 case LoadType::kI64Load8S:
461 movsxbq(dst.gp(), src_op);
462 break;
463 case LoadType::kI32Load16U:
464 case LoadType::kI64Load16U:
465 movzxwl(dst.gp(), src_op);
466 break;
467 case LoadType::kI32Load16S:
468 movsxwl(dst.gp(), src_op);
469 break;
470 case LoadType::kI64Load16S:
471 movsxwq(dst.gp(), src_op);
472 break;
473 case LoadType::kI32Load:
474 case LoadType::kI64Load32U:
475 movl(dst.gp(), src_op);
476 break;
477 case LoadType::kI64Load32S:
478 movsxlq(dst.gp(), src_op);
479 break;
480 case LoadType::kI64Load:
481 movq(dst.gp(), src_op);
482 break;
483 case LoadType::kF32Load:
484 Movss(dst.fp(), src_op);
485 break;
486 case LoadType::kF64Load:
487 Movsd(dst.fp(), src_op);
488 break;
489 case LoadType::kS128Load:
490 Movdqu(dst.fp(), src_op);
491 break;
492 }
493 }
494
Store(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList,uint32_t * protected_store_pc,bool is_store_mem)495 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
496 uintptr_t offset_imm, LiftoffRegister src,
497 StoreType type, LiftoffRegList /* pinned */,
498 uint32_t* protected_store_pc, bool is_store_mem) {
499 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
500 if (protected_store_pc) *protected_store_pc = pc_offset();
501 switch (type.value()) {
502 case StoreType::kI32Store8:
503 case StoreType::kI64Store8:
504 movb(dst_op, src.gp());
505 break;
506 case StoreType::kI32Store16:
507 case StoreType::kI64Store16:
508 movw(dst_op, src.gp());
509 break;
510 case StoreType::kI32Store:
511 case StoreType::kI64Store32:
512 movl(dst_op, src.gp());
513 break;
514 case StoreType::kI64Store:
515 movq(dst_op, src.gp());
516 break;
517 case StoreType::kF32Store:
518 Movss(dst_op, src.fp());
519 break;
520 case StoreType::kF64Store:
521 Movsd(dst_op, src.fp());
522 break;
523 case StoreType::kS128Store:
524 Movdqu(dst_op, src.fp());
525 break;
526 }
527 }
528
AtomicStore(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)529 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
530 uintptr_t offset_imm, LiftoffRegister src,
531 StoreType type, LiftoffRegList pinned) {
532 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
533 Register src_reg = src.gp();
534 if (cache_state()->is_used(src)) {
535 movq(kScratchRegister, src_reg);
536 src_reg = kScratchRegister;
537 }
538 switch (type.value()) {
539 case StoreType::kI32Store8:
540 case StoreType::kI64Store8:
541 xchgb(src_reg, dst_op);
542 break;
543 case StoreType::kI32Store16:
544 case StoreType::kI64Store16:
545 xchgw(src_reg, dst_op);
546 break;
547 case StoreType::kI32Store:
548 case StoreType::kI64Store32:
549 xchgl(src_reg, dst_op);
550 break;
551 case StoreType::kI64Store:
552 xchgq(src_reg, dst_op);
553 break;
554 default:
555 UNREACHABLE();
556 }
557 }
558
AtomicAdd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)559 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
560 uintptr_t offset_imm, LiftoffRegister value,
561 LiftoffRegister result, StoreType type) {
562 DCHECK(!cache_state()->is_used(result));
563 if (cache_state()->is_used(value)) {
564 // We cannot overwrite {value}, but the {value} register is changed in the
565 // code we generate. Therefore we copy {value} to {result} and use the
566 // {result} register in the code below.
567 movq(result.gp(), value.gp());
568 value = result;
569 }
570 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
571 lock();
572 switch (type.value()) {
573 case StoreType::kI32Store8:
574 case StoreType::kI64Store8:
575 xaddb(dst_op, value.gp());
576 movzxbq(result.gp(), value.gp());
577 break;
578 case StoreType::kI32Store16:
579 case StoreType::kI64Store16:
580 xaddw(dst_op, value.gp());
581 movzxwq(result.gp(), value.gp());
582 break;
583 case StoreType::kI32Store:
584 case StoreType::kI64Store32:
585 xaddl(dst_op, value.gp());
586 if (value != result) {
587 movq(result.gp(), value.gp());
588 }
589 break;
590 case StoreType::kI64Store:
591 xaddq(dst_op, value.gp());
592 if (value != result) {
593 movq(result.gp(), value.gp());
594 }
595 break;
596 default:
597 UNREACHABLE();
598 }
599 }
600
AtomicSub(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)601 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
602 uintptr_t offset_imm, LiftoffRegister value,
603 LiftoffRegister result, StoreType type) {
604 DCHECK(!cache_state()->is_used(result));
605 if (cache_state()->is_used(value)) {
606 // We cannot overwrite {value}, but the {value} register is changed in the
607 // code we generate. Therefore we copy {value} to {result} and use the
608 // {result} register in the code below.
609 movq(result.gp(), value.gp());
610 value = result;
611 }
612 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
613 switch (type.value()) {
614 case StoreType::kI32Store8:
615 case StoreType::kI64Store8:
616 negb(value.gp());
617 lock();
618 xaddb(dst_op, value.gp());
619 movzxbq(result.gp(), value.gp());
620 break;
621 case StoreType::kI32Store16:
622 case StoreType::kI64Store16:
623 negw(value.gp());
624 lock();
625 xaddw(dst_op, value.gp());
626 movzxwq(result.gp(), value.gp());
627 break;
628 case StoreType::kI32Store:
629 case StoreType::kI64Store32:
630 negl(value.gp());
631 lock();
632 xaddl(dst_op, value.gp());
633 if (value != result) {
634 movq(result.gp(), value.gp());
635 }
636 break;
637 case StoreType::kI64Store:
638 negq(value.gp());
639 lock();
640 xaddq(dst_op, value.gp());
641 if (value != result) {
642 movq(result.gp(), value.gp());
643 }
644 break;
645 default:
646 UNREACHABLE();
647 }
648 }
649
650 namespace liftoff {
651 #define __ lasm->
652
AtomicBinop(LiftoffAssembler * lasm,void (Assembler::* opl)(Register,Register),void (Assembler::* opq)(Register,Register),Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)653 inline void AtomicBinop(LiftoffAssembler* lasm,
654 void (Assembler::*opl)(Register, Register),
655 void (Assembler::*opq)(Register, Register),
656 Register dst_addr, Register offset_reg,
657 uintptr_t offset_imm, LiftoffRegister value,
658 LiftoffRegister result, StoreType type) {
659 DCHECK(!__ cache_state()->is_used(result));
660 Register value_reg = value.gp();
661 // The cmpxchg instruction uses rax to store the old value of the
662 // compare-exchange primitive. Therefore we have to spill the register and
663 // move any use to another register.
664 LiftoffRegList pinned =
665 LiftoffRegList::ForRegs(dst_addr, offset_reg, value_reg);
666 __ ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
667 Operand dst_op = liftoff::GetMemOp(lasm, dst_addr, offset_reg, offset_imm);
668
669 switch (type.value()) {
670 case StoreType::kI32Store8:
671 case StoreType::kI64Store8: {
672 Label binop;
673 __ xorq(rax, rax);
674 __ movb(rax, dst_op);
675 __ bind(&binop);
676 __ movl(kScratchRegister, rax);
677 (lasm->*opl)(kScratchRegister, value_reg);
678 __ lock();
679 __ cmpxchgb(dst_op, kScratchRegister);
680 __ j(not_equal, &binop);
681 break;
682 }
683 case StoreType::kI32Store16:
684 case StoreType::kI64Store16: {
685 Label binop;
686 __ xorq(rax, rax);
687 __ movw(rax, dst_op);
688 __ bind(&binop);
689 __ movl(kScratchRegister, rax);
690 (lasm->*opl)(kScratchRegister, value_reg);
691 __ lock();
692 __ cmpxchgw(dst_op, kScratchRegister);
693 __ j(not_equal, &binop);
694 break;
695 }
696 case StoreType::kI32Store:
697 case StoreType::kI64Store32: {
698 Label binop;
699 __ movl(rax, dst_op);
700 __ bind(&binop);
701 __ movl(kScratchRegister, rax);
702 (lasm->*opl)(kScratchRegister, value_reg);
703 __ lock();
704 __ cmpxchgl(dst_op, kScratchRegister);
705 __ j(not_equal, &binop);
706 break;
707 }
708 case StoreType::kI64Store: {
709 Label binop;
710 __ movq(rax, dst_op);
711 __ bind(&binop);
712 __ movq(kScratchRegister, rax);
713 (lasm->*opq)(kScratchRegister, value_reg);
714 __ lock();
715 __ cmpxchgq(dst_op, kScratchRegister);
716 __ j(not_equal, &binop);
717 break;
718 }
719 default:
720 UNREACHABLE();
721 }
722
723 if (result.gp() != rax) {
724 __ movq(result.gp(), rax);
725 }
726 }
727 #undef __
728 } // namespace liftoff
729
AtomicAnd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)730 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
731 uintptr_t offset_imm, LiftoffRegister value,
732 LiftoffRegister result, StoreType type) {
733 liftoff::AtomicBinop(this, &Assembler::andl, &Assembler::andq, dst_addr,
734 offset_reg, offset_imm, value, result, type);
735 }
736
AtomicOr(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)737 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
738 uintptr_t offset_imm, LiftoffRegister value,
739 LiftoffRegister result, StoreType type) {
740 liftoff::AtomicBinop(this, &Assembler::orl, &Assembler::orq, dst_addr,
741 offset_reg, offset_imm, value, result, type);
742 }
743
AtomicXor(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)744 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
745 uintptr_t offset_imm, LiftoffRegister value,
746 LiftoffRegister result, StoreType type) {
747 liftoff::AtomicBinop(this, &Assembler::xorl, &Assembler::xorq, dst_addr,
748 offset_reg, offset_imm, value, result, type);
749 }
750
AtomicExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)751 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
752 uintptr_t offset_imm,
753 LiftoffRegister value,
754 LiftoffRegister result, StoreType type) {
755 DCHECK(!cache_state()->is_used(result));
756 if (cache_state()->is_used(value)) {
757 // We cannot overwrite {value}, but the {value} register is changed in the
758 // code we generate. Therefore we copy {value} to {result} and use the
759 // {result} register in the code below.
760 movq(result.gp(), value.gp());
761 value = result;
762 }
763 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
764 switch (type.value()) {
765 case StoreType::kI32Store8:
766 case StoreType::kI64Store8:
767 xchgb(value.gp(), dst_op);
768 movzxbq(result.gp(), value.gp());
769 break;
770 case StoreType::kI32Store16:
771 case StoreType::kI64Store16:
772 xchgw(value.gp(), dst_op);
773 movzxwq(result.gp(), value.gp());
774 break;
775 case StoreType::kI32Store:
776 case StoreType::kI64Store32:
777 xchgl(value.gp(), dst_op);
778 if (value != result) {
779 movq(result.gp(), value.gp());
780 }
781 break;
782 case StoreType::kI64Store:
783 xchgq(value.gp(), dst_op);
784 if (value != result) {
785 movq(result.gp(), value.gp());
786 }
787 break;
788 default:
789 UNREACHABLE();
790 }
791 }
792
AtomicCompareExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)793 void LiftoffAssembler::AtomicCompareExchange(
794 Register dst_addr, Register offset_reg, uintptr_t offset_imm,
795 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
796 StoreType type) {
797 Register value_reg = new_value.gp();
798 // The cmpxchg instruction uses rax to store the old value of the
799 // compare-exchange primitive. Therefore we have to spill the register and
800 // move any use to another register.
801 LiftoffRegList pinned =
802 LiftoffRegList::ForRegs(dst_addr, offset_reg, expected, value_reg);
803 ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
804 if (expected.gp() != rax) {
805 movq(rax, expected.gp());
806 }
807
808 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
809
810 lock();
811 switch (type.value()) {
812 case StoreType::kI32Store8:
813 case StoreType::kI64Store8: {
814 cmpxchgb(dst_op, value_reg);
815 movzxbq(result.gp(), rax);
816 break;
817 }
818 case StoreType::kI32Store16:
819 case StoreType::kI64Store16: {
820 cmpxchgw(dst_op, value_reg);
821 movzxwq(result.gp(), rax);
822 break;
823 }
824 case StoreType::kI32Store: {
825 cmpxchgl(dst_op, value_reg);
826 if (result.gp() != rax) {
827 movl(result.gp(), rax);
828 }
829 break;
830 }
831 case StoreType::kI64Store32: {
832 cmpxchgl(dst_op, value_reg);
833 // Zero extension.
834 movl(result.gp(), rax);
835 break;
836 }
837 case StoreType::kI64Store: {
838 cmpxchgq(dst_op, value_reg);
839 if (result.gp() != rax) {
840 movq(result.gp(), rax);
841 }
842 break;
843 }
844 default:
845 UNREACHABLE();
846 }
847 }
848
AtomicFence()849 void LiftoffAssembler::AtomicFence() { mfence(); }
850
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)851 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
852 uint32_t caller_slot_idx,
853 ValueKind kind) {
854 Operand src(rbp, kSystemPointerSize * (caller_slot_idx + 1));
855 liftoff::Load(this, dst, src, kind);
856 }
857
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)858 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
859 uint32_t caller_slot_idx,
860 ValueKind kind) {
861 Operand dst(rbp, kSystemPointerSize * (caller_slot_idx + 1));
862 liftoff::Store(this, dst, src, kind);
863 }
864
LoadReturnStackSlot(LiftoffRegister reg,int offset,ValueKind kind)865 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
866 ValueKind kind) {
867 Operand src(rsp, offset);
868 liftoff::Load(this, reg, src, kind);
869 }
870
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)871 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
872 ValueKind kind) {
873 DCHECK_NE(dst_offset, src_offset);
874 Operand dst = liftoff::GetStackSlot(dst_offset);
875 Operand src = liftoff::GetStackSlot(src_offset);
876 if (element_size_log2(kind) == 2) {
877 movl(kScratchRegister, src);
878 movl(dst, kScratchRegister);
879 } else {
880 DCHECK_EQ(3, element_size_log2(kind));
881 movq(kScratchRegister, src);
882 movq(dst, kScratchRegister);
883 }
884 }
885
Move(Register dst,Register src,ValueKind kind)886 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
887 DCHECK_NE(dst, src);
888 if (kind == kI32) {
889 movl(dst, src);
890 } else {
891 DCHECK(kI64 == kind || is_reference(kind));
892 movq(dst, src);
893 }
894 }
895
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)896 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
897 ValueKind kind) {
898 DCHECK_NE(dst, src);
899 if (kind == kF32) {
900 Movss(dst, src);
901 } else if (kind == kF64) {
902 Movsd(dst, src);
903 } else {
904 DCHECK_EQ(kS128, kind);
905 Movapd(dst, src);
906 }
907 }
908
Spill(int offset,LiftoffRegister reg,ValueKind kind)909 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
910 RecordUsedSpillOffset(offset);
911 Operand dst = liftoff::GetStackSlot(offset);
912 switch (kind) {
913 case kI32:
914 movl(dst, reg.gp());
915 break;
916 case kI64:
917 case kOptRef:
918 case kRef:
919 case kRtt:
920 case kRttWithDepth:
921 movq(dst, reg.gp());
922 break;
923 case kF32:
924 Movss(dst, reg.fp());
925 break;
926 case kF64:
927 Movsd(dst, reg.fp());
928 break;
929 case kS128:
930 Movdqu(dst, reg.fp());
931 break;
932 default:
933 UNREACHABLE();
934 }
935 }
936
Spill(int offset,WasmValue value)937 void LiftoffAssembler::Spill(int offset, WasmValue value) {
938 RecordUsedSpillOffset(offset);
939 Operand dst = liftoff::GetStackSlot(offset);
940 switch (value.type().kind()) {
941 case kI32:
942 movl(dst, Immediate(value.to_i32()));
943 break;
944 case kI64: {
945 if (is_int32(value.to_i64())) {
946 // Sign extend low word.
947 movq(dst, Immediate(static_cast<int32_t>(value.to_i64())));
948 } else if (is_uint32(value.to_i64())) {
949 // Zero extend low word.
950 movl(kScratchRegister, Immediate(static_cast<int32_t>(value.to_i64())));
951 movq(dst, kScratchRegister);
952 } else {
953 movq(kScratchRegister, value.to_i64());
954 movq(dst, kScratchRegister);
955 }
956 break;
957 }
958 default:
959 // We do not track f32 and f64 constants, hence they are unreachable.
960 UNREACHABLE();
961 }
962 }
963
Fill(LiftoffRegister reg,int offset,ValueKind kind)964 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
965 liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
966 }
967
FillI64Half(Register,int offset,RegPairHalf)968 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
969 UNREACHABLE();
970 }
971
FillStackSlotsWithZero(int start,int size)972 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
973 DCHECK_LT(0, size);
974 RecordUsedSpillOffset(start + size);
975
976 if (size <= 3 * kStackSlotSize) {
977 // Special straight-line code for up to three slots
978 // (7-10 bytes per slot: REX C7 <1-4 bytes op> <4 bytes imm>),
979 // And a movd (6-9 byte) when size % 8 != 0;
980 uint32_t remainder = size;
981 for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) {
982 movq(liftoff::GetStackSlot(start + remainder), Immediate(0));
983 }
984 DCHECK(remainder == 4 || remainder == 0);
985 if (remainder) {
986 movl(liftoff::GetStackSlot(start + remainder), Immediate(0));
987 }
988 } else {
989 // General case for bigger counts.
990 // This sequence takes 19-22 bytes (3 for pushes, 4-7 for lea, 2 for xor, 5
991 // for mov, 2 for repstosl, 3 for pops).
992 pushq(rax);
993 pushq(rcx);
994 pushq(rdi);
995 leaq(rdi, liftoff::GetStackSlot(start + size));
996 xorl(rax, rax);
997 // Convert size (bytes) to doublewords (4-bytes).
998 movl(rcx, Immediate(size / 4));
999 repstosl();
1000 popq(rdi);
1001 popq(rcx);
1002 popq(rax);
1003 }
1004 }
1005
emit_i32_add(Register dst,Register lhs,Register rhs)1006 void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1007 if (lhs != dst) {
1008 leal(dst, Operand(lhs, rhs, times_1, 0));
1009 } else {
1010 addl(dst, rhs);
1011 }
1012 }
1013
emit_i32_addi(Register dst,Register lhs,int32_t imm)1014 void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1015 if (lhs != dst) {
1016 leal(dst, Operand(lhs, imm));
1017 } else {
1018 addl(dst, Immediate(imm));
1019 }
1020 }
1021
emit_i32_sub(Register dst,Register lhs,Register rhs)1022 void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1023 if (dst != rhs) {
1024 // Default path.
1025 if (dst != lhs) movl(dst, lhs);
1026 subl(dst, rhs);
1027 } else if (lhs == rhs) {
1028 // Degenerate case.
1029 xorl(dst, dst);
1030 } else {
1031 // Emit {dst = lhs + -rhs} if dst == rhs.
1032 negl(dst);
1033 addl(dst, lhs);
1034 }
1035 }
1036
emit_i32_subi(Register dst,Register lhs,int32_t imm)1037 void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1038 if (dst != lhs) {
1039 // We'll have to implement an UB-safe version if we need this corner case.
1040 DCHECK_NE(imm, kMinInt);
1041 leal(dst, Operand(lhs, -imm));
1042 } else {
1043 subl(dst, Immediate(imm));
1044 }
1045 }
1046
1047 namespace liftoff {
1048 template <void (Assembler::*op)(Register, Register),
1049 void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOp(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs)1050 void EmitCommutativeBinOp(LiftoffAssembler* assm, Register dst, Register lhs,
1051 Register rhs) {
1052 if (dst == rhs) {
1053 (assm->*op)(dst, lhs);
1054 } else {
1055 if (dst != lhs) (assm->*mov)(dst, lhs);
1056 (assm->*op)(dst, rhs);
1057 }
1058 }
1059
1060 template <void (Assembler::*op)(Register, Immediate),
1061 void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOpImm(LiftoffAssembler * assm,Register dst,Register lhs,int32_t imm)1062 void EmitCommutativeBinOpImm(LiftoffAssembler* assm, Register dst, Register lhs,
1063 int32_t imm) {
1064 if (dst != lhs) (assm->*mov)(dst, lhs);
1065 (assm->*op)(dst, Immediate(imm));
1066 }
1067
1068 } // namespace liftoff
1069
emit_i32_mul(Register dst,Register lhs,Register rhs)1070 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1071 liftoff::EmitCommutativeBinOp<&Assembler::imull, &Assembler::movl>(this, dst,
1072 lhs, rhs);
1073 }
1074
1075 namespace liftoff {
1076 enum class DivOrRem : uint8_t { kDiv, kRem };
1077 template <typename type, DivOrRem div_or_rem>
EmitIntDivOrRem(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1078 void EmitIntDivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
1079 Register rhs, Label* trap_div_by_zero,
1080 Label* trap_div_unrepresentable) {
1081 constexpr bool needs_unrepresentable_check =
1082 std::is_signed<type>::value && div_or_rem == DivOrRem::kDiv;
1083 constexpr bool special_case_minus_1 =
1084 std::is_signed<type>::value && div_or_rem == DivOrRem::kRem;
1085 DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1086
1087 #define iop(name, ...) \
1088 do { \
1089 if (sizeof(type) == 4) { \
1090 assm->name##l(__VA_ARGS__); \
1091 } else { \
1092 assm->name##q(__VA_ARGS__); \
1093 } \
1094 } while (false)
1095
1096 // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1097 // these registers are unused. If {rhs} is stored in one of them, move it to
1098 // another temporary register.
1099 // Do all this before any branch, such that the code is executed
1100 // unconditionally, as the cache state will also be modified unconditionally.
1101 assm->SpillRegisters(rdx, rax);
1102 if (rhs == rax || rhs == rdx) {
1103 iop(mov, kScratchRegister, rhs);
1104 rhs = kScratchRegister;
1105 }
1106
1107 // Check for division by zero.
1108 iop(test, rhs, rhs);
1109 assm->j(zero, trap_div_by_zero);
1110
1111 Label done;
1112 if (needs_unrepresentable_check) {
1113 // Check for {kMinInt / -1}. This is unrepresentable.
1114 Label do_div;
1115 iop(cmp, rhs, Immediate(-1));
1116 assm->j(not_equal, &do_div);
1117 // {lhs} is min int if {lhs - 1} overflows.
1118 iop(cmp, lhs, Immediate(1));
1119 assm->j(overflow, trap_div_unrepresentable);
1120 assm->bind(&do_div);
1121 } else if (special_case_minus_1) {
1122 // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1123 // cannot be computed).
1124 Label do_rem;
1125 iop(cmp, rhs, Immediate(-1));
1126 assm->j(not_equal, &do_rem);
1127 // clang-format off
1128 // (conflicts with presubmit checks because it is confused about "xor")
1129 iop(xor, dst, dst);
1130 // clang-format on
1131 assm->jmp(&done);
1132 assm->bind(&do_rem);
1133 }
1134
1135 // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1136 // do the division.
1137 if (lhs != rax) iop(mov, rax, lhs);
1138 if (std::is_same<int32_t, type>::value) { // i32
1139 assm->cdq();
1140 assm->idivl(rhs);
1141 } else if (std::is_same<uint32_t, type>::value) { // u32
1142 assm->xorl(rdx, rdx);
1143 assm->divl(rhs);
1144 } else if (std::is_same<int64_t, type>::value) { // i64
1145 assm->cqo();
1146 assm->idivq(rhs);
1147 } else { // u64
1148 assm->xorq(rdx, rdx);
1149 assm->divq(rhs);
1150 }
1151
1152 // Move back the result (in {eax} or {edx}) into the {dst} register.
1153 constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? rax : rdx;
1154 if (dst != kResultReg) {
1155 iop(mov, dst, kResultReg);
1156 }
1157 if (special_case_minus_1) assm->bind(&done);
1158 }
1159 } // namespace liftoff
1160
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1161 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1162 Label* trap_div_by_zero,
1163 Label* trap_div_unrepresentable) {
1164 liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kDiv>(
1165 this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1166 }
1167
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1168 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1169 Label* trap_div_by_zero) {
1170 liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kDiv>(
1171 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1172 }
1173
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1174 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1175 Label* trap_div_by_zero) {
1176 liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kRem>(
1177 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1178 }
1179
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1180 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1181 Label* trap_div_by_zero) {
1182 liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kRem>(
1183 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1184 }
1185
emit_i32_and(Register dst,Register lhs,Register rhs)1186 void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1187 liftoff::EmitCommutativeBinOp<&Assembler::andl, &Assembler::movl>(this, dst,
1188 lhs, rhs);
1189 }
1190
emit_i32_andi(Register dst,Register lhs,int32_t imm)1191 void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1192 liftoff::EmitCommutativeBinOpImm<&Assembler::andl, &Assembler::movl>(
1193 this, dst, lhs, imm);
1194 }
1195
emit_i32_or(Register dst,Register lhs,Register rhs)1196 void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1197 liftoff::EmitCommutativeBinOp<&Assembler::orl, &Assembler::movl>(this, dst,
1198 lhs, rhs);
1199 }
1200
emit_i32_ori(Register dst,Register lhs,int32_t imm)1201 void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1202 liftoff::EmitCommutativeBinOpImm<&Assembler::orl, &Assembler::movl>(this, dst,
1203 lhs, imm);
1204 }
1205
emit_i32_xor(Register dst,Register lhs,Register rhs)1206 void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1207 liftoff::EmitCommutativeBinOp<&Assembler::xorl, &Assembler::movl>(this, dst,
1208 lhs, rhs);
1209 }
1210
emit_i32_xori(Register dst,Register lhs,int32_t imm)1211 void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1212 liftoff::EmitCommutativeBinOpImm<&Assembler::xorl, &Assembler::movl>(
1213 this, dst, lhs, imm);
1214 }
1215
1216 namespace liftoff {
1217 template <ValueKind kind>
EmitShiftOperation(LiftoffAssembler * assm,Register dst,Register src,Register amount,void (Assembler::* emit_shift)(Register))1218 inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst,
1219 Register src, Register amount,
1220 void (Assembler::*emit_shift)(Register)) {
1221 // If dst is rcx, compute into the scratch register first, then move to rcx.
1222 if (dst == rcx) {
1223 assm->Move(kScratchRegister, src, kind);
1224 if (amount != rcx) assm->Move(rcx, amount, kind);
1225 (assm->*emit_shift)(kScratchRegister);
1226 assm->Move(rcx, kScratchRegister, kind);
1227 return;
1228 }
1229
1230 // Move amount into rcx. If rcx is in use, move its content into the scratch
1231 // register. If src is rcx, src is now the scratch register.
1232 bool use_scratch = false;
1233 if (amount != rcx) {
1234 use_scratch =
1235 src == rcx || assm->cache_state()->is_used(LiftoffRegister(rcx));
1236 if (use_scratch) assm->movq(kScratchRegister, rcx);
1237 if (src == rcx) src = kScratchRegister;
1238 assm->Move(rcx, amount, kind);
1239 }
1240
1241 // Do the actual shift.
1242 if (dst != src) assm->Move(dst, src, kind);
1243 (assm->*emit_shift)(dst);
1244
1245 // Restore rcx if needed.
1246 if (use_scratch) assm->movq(rcx, kScratchRegister);
1247 }
1248 } // namespace liftoff
1249
emit_i32_shl(Register dst,Register src,Register amount)1250 void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1251 Register amount) {
1252 liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1253 &Assembler::shll_cl);
1254 }
1255
emit_i32_shli(Register dst,Register src,int32_t amount)1256 void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1257 int32_t amount) {
1258 if (dst != src) movl(dst, src);
1259 shll(dst, Immediate(amount & 31));
1260 }
1261
emit_i32_sar(Register dst,Register src,Register amount)1262 void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1263 Register amount) {
1264 liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1265 &Assembler::sarl_cl);
1266 }
1267
emit_i32_sari(Register dst,Register src,int32_t amount)1268 void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1269 int32_t amount) {
1270 if (dst != src) movl(dst, src);
1271 sarl(dst, Immediate(amount & 31));
1272 }
1273
emit_i32_shr(Register dst,Register src,Register amount)1274 void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1275 Register amount) {
1276 liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1277 &Assembler::shrl_cl);
1278 }
1279
emit_i32_shri(Register dst,Register src,int32_t amount)1280 void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1281 int32_t amount) {
1282 if (dst != src) movl(dst, src);
1283 shrl(dst, Immediate(amount & 31));
1284 }
1285
emit_i32_clz(Register dst,Register src)1286 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1287 Lzcntl(dst, src);
1288 }
1289
emit_i32_ctz(Register dst,Register src)1290 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1291 Tzcntl(dst, src);
1292 }
1293
emit_i32_popcnt(Register dst,Register src)1294 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1295 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1296 CpuFeatureScope scope(this, POPCNT);
1297 popcntl(dst, src);
1298 return true;
1299 }
1300
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1301 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1302 LiftoffRegister rhs) {
1303 if (lhs.gp() != dst.gp()) {
1304 leaq(dst.gp(), Operand(lhs.gp(), rhs.gp(), times_1, 0));
1305 } else {
1306 addq(dst.gp(), rhs.gp());
1307 }
1308 }
1309
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1310 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1311 int64_t imm) {
1312 if (!is_int32(imm)) {
1313 TurboAssembler::Move(kScratchRegister, imm);
1314 if (lhs.gp() == dst.gp()) {
1315 addq(dst.gp(), kScratchRegister);
1316 } else {
1317 leaq(dst.gp(), Operand(lhs.gp(), kScratchRegister, times_1, 0));
1318 }
1319 } else if (lhs.gp() == dst.gp()) {
1320 addq(dst.gp(), Immediate(static_cast<int32_t>(imm)));
1321 } else {
1322 leaq(dst.gp(), Operand(lhs.gp(), static_cast<int32_t>(imm)));
1323 }
1324 }
1325
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1326 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1327 LiftoffRegister rhs) {
1328 if (lhs.gp() == rhs.gp()) {
1329 xorq(dst.gp(), dst.gp());
1330 } else if (dst.gp() == rhs.gp()) {
1331 negq(dst.gp());
1332 addq(dst.gp(), lhs.gp());
1333 } else {
1334 if (dst.gp() != lhs.gp()) movq(dst.gp(), lhs.gp());
1335 subq(dst.gp(), rhs.gp());
1336 }
1337 }
1338
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1339 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1340 LiftoffRegister rhs) {
1341 liftoff::EmitCommutativeBinOp<&Assembler::imulq, &Assembler::movq>(
1342 this, dst.gp(), lhs.gp(), rhs.gp());
1343 }
1344
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1345 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1346 LiftoffRegister rhs,
1347 Label* trap_div_by_zero,
1348 Label* trap_div_unrepresentable) {
1349 liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kDiv>(
1350 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero,
1351 trap_div_unrepresentable);
1352 return true;
1353 }
1354
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1355 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1356 LiftoffRegister rhs,
1357 Label* trap_div_by_zero) {
1358 liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kDiv>(
1359 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1360 return true;
1361 }
1362
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1363 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1364 LiftoffRegister rhs,
1365 Label* trap_div_by_zero) {
1366 liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kRem>(
1367 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1368 return true;
1369 }
1370
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1371 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1372 LiftoffRegister rhs,
1373 Label* trap_div_by_zero) {
1374 liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kRem>(
1375 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1376 return true;
1377 }
1378
emit_i64_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1379 void LiftoffAssembler::emit_i64_and(LiftoffRegister dst, LiftoffRegister lhs,
1380 LiftoffRegister rhs) {
1381 liftoff::EmitCommutativeBinOp<&Assembler::andq, &Assembler::movq>(
1382 this, dst.gp(), lhs.gp(), rhs.gp());
1383 }
1384
emit_i64_andi(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1385 void LiftoffAssembler::emit_i64_andi(LiftoffRegister dst, LiftoffRegister lhs,
1386 int32_t imm) {
1387 liftoff::EmitCommutativeBinOpImm<&Assembler::andq, &Assembler::movq>(
1388 this, dst.gp(), lhs.gp(), imm);
1389 }
1390
emit_i64_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1391 void LiftoffAssembler::emit_i64_or(LiftoffRegister dst, LiftoffRegister lhs,
1392 LiftoffRegister rhs) {
1393 liftoff::EmitCommutativeBinOp<&Assembler::orq, &Assembler::movq>(
1394 this, dst.gp(), lhs.gp(), rhs.gp());
1395 }
1396
emit_i64_ori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1397 void LiftoffAssembler::emit_i64_ori(LiftoffRegister dst, LiftoffRegister lhs,
1398 int32_t imm) {
1399 liftoff::EmitCommutativeBinOpImm<&Assembler::orq, &Assembler::movq>(
1400 this, dst.gp(), lhs.gp(), imm);
1401 }
1402
emit_i64_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1403 void LiftoffAssembler::emit_i64_xor(LiftoffRegister dst, LiftoffRegister lhs,
1404 LiftoffRegister rhs) {
1405 liftoff::EmitCommutativeBinOp<&Assembler::xorq, &Assembler::movq>(
1406 this, dst.gp(), lhs.gp(), rhs.gp());
1407 }
1408
emit_i64_xori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1409 void LiftoffAssembler::emit_i64_xori(LiftoffRegister dst, LiftoffRegister lhs,
1410 int32_t imm) {
1411 liftoff::EmitCommutativeBinOpImm<&Assembler::xorq, &Assembler::movq>(
1412 this, dst.gp(), lhs.gp(), imm);
1413 }
1414
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1415 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1416 Register amount) {
1417 liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1418 &Assembler::shlq_cl);
1419 }
1420
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1421 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1422 int32_t amount) {
1423 if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1424 shlq(dst.gp(), Immediate(amount & 63));
1425 }
1426
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1427 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1428 Register amount) {
1429 liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1430 &Assembler::sarq_cl);
1431 }
1432
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1433 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1434 int32_t amount) {
1435 if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1436 sarq(dst.gp(), Immediate(amount & 63));
1437 }
1438
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1439 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1440 Register amount) {
1441 liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1442 &Assembler::shrq_cl);
1443 }
1444
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1445 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1446 int32_t amount) {
1447 if (dst != src) movq(dst.gp(), src.gp());
1448 shrq(dst.gp(), Immediate(amount & 63));
1449 }
1450
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1451 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1452 Lzcntq(dst.gp(), src.gp());
1453 }
1454
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1455 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1456 Tzcntq(dst.gp(), src.gp());
1457 }
1458
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1459 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1460 LiftoffRegister src) {
1461 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1462 CpuFeatureScope scope(this, POPCNT);
1463 popcntq(dst.gp(), src.gp());
1464 return true;
1465 }
1466
emit_u32_to_intptr(Register dst,Register src)1467 void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) {
1468 movl(dst, src);
1469 }
1470
emit_f32_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1471 void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
1472 DoubleRegister rhs) {
1473 if (CpuFeatures::IsSupported(AVX)) {
1474 CpuFeatureScope scope(this, AVX);
1475 vaddss(dst, lhs, rhs);
1476 } else if (dst == rhs) {
1477 addss(dst, lhs);
1478 } else {
1479 if (dst != lhs) movss(dst, lhs);
1480 addss(dst, rhs);
1481 }
1482 }
1483
emit_f32_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1484 void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
1485 DoubleRegister rhs) {
1486 if (CpuFeatures::IsSupported(AVX)) {
1487 CpuFeatureScope scope(this, AVX);
1488 vsubss(dst, lhs, rhs);
1489 } else if (dst == rhs) {
1490 movss(kScratchDoubleReg, rhs);
1491 movss(dst, lhs);
1492 subss(dst, kScratchDoubleReg);
1493 } else {
1494 if (dst != lhs) movss(dst, lhs);
1495 subss(dst, rhs);
1496 }
1497 }
1498
emit_f32_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1499 void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
1500 DoubleRegister rhs) {
1501 if (CpuFeatures::IsSupported(AVX)) {
1502 CpuFeatureScope scope(this, AVX);
1503 vmulss(dst, lhs, rhs);
1504 } else if (dst == rhs) {
1505 mulss(dst, lhs);
1506 } else {
1507 if (dst != lhs) movss(dst, lhs);
1508 mulss(dst, rhs);
1509 }
1510 }
1511
emit_f32_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1512 void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
1513 DoubleRegister rhs) {
1514 if (CpuFeatures::IsSupported(AVX)) {
1515 CpuFeatureScope scope(this, AVX);
1516 vdivss(dst, lhs, rhs);
1517 } else if (dst == rhs) {
1518 movss(kScratchDoubleReg, rhs);
1519 movss(dst, lhs);
1520 divss(dst, kScratchDoubleReg);
1521 } else {
1522 if (dst != lhs) movss(dst, lhs);
1523 divss(dst, rhs);
1524 }
1525 }
1526
1527 namespace liftoff {
1528 enum class MinOrMax : uint8_t { kMin, kMax };
1529 template <typename type>
EmitFloatMinOrMax(LiftoffAssembler * assm,DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs,MinOrMax min_or_max)1530 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
1531 DoubleRegister lhs, DoubleRegister rhs,
1532 MinOrMax min_or_max) {
1533 Label is_nan;
1534 Label lhs_below_rhs;
1535 Label lhs_above_rhs;
1536 Label done;
1537
1538 #define dop(name, ...) \
1539 do { \
1540 if (sizeof(type) == 4) { \
1541 assm->name##s(__VA_ARGS__); \
1542 } else { \
1543 assm->name##d(__VA_ARGS__); \
1544 } \
1545 } while (false)
1546
1547 // Check the easy cases first: nan (e.g. unordered), smaller and greater.
1548 // NaN has to be checked first, because PF=1 implies CF=1.
1549 dop(Ucomis, lhs, rhs);
1550 assm->j(parity_even, &is_nan, Label::kNear); // PF=1
1551 assm->j(below, &lhs_below_rhs, Label::kNear); // CF=1
1552 assm->j(above, &lhs_above_rhs, Label::kNear); // CF=0 && ZF=0
1553
1554 // If we get here, then either
1555 // a) {lhs == rhs},
1556 // b) {lhs == -0.0} and {rhs == 0.0}, or
1557 // c) {lhs == 0.0} and {rhs == -0.0}.
1558 // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
1559 // bit of {rhs} to differentiate b) and c).
1560 dop(Movmskp, kScratchRegister, rhs);
1561 assm->testl(kScratchRegister, Immediate(1));
1562 assm->j(zero, &lhs_below_rhs, Label::kNear);
1563 assm->jmp(&lhs_above_rhs, Label::kNear);
1564
1565 assm->bind(&is_nan);
1566 // Create a NaN output.
1567 dop(Xorp, dst, dst);
1568 dop(Divs, dst, dst);
1569 assm->jmp(&done, Label::kNear);
1570
1571 assm->bind(&lhs_below_rhs);
1572 DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
1573 if (dst != lhs_below_rhs_src) dop(Movs, dst, lhs_below_rhs_src);
1574 assm->jmp(&done, Label::kNear);
1575
1576 assm->bind(&lhs_above_rhs);
1577 DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
1578 if (dst != lhs_above_rhs_src) dop(Movs, dst, lhs_above_rhs_src);
1579
1580 assm->bind(&done);
1581 }
1582 } // namespace liftoff
1583
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1584 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1585 DoubleRegister rhs) {
1586 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1587 liftoff::MinOrMax::kMin);
1588 }
1589
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1590 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1591 DoubleRegister rhs) {
1592 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1593 liftoff::MinOrMax::kMax);
1594 }
1595
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1596 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1597 DoubleRegister rhs) {
1598 static constexpr int kF32SignBit = 1 << 31;
1599 Movd(kScratchRegister, lhs);
1600 andl(kScratchRegister, Immediate(~kF32SignBit));
1601 Movd(liftoff::kScratchRegister2, rhs);
1602 andl(liftoff::kScratchRegister2, Immediate(kF32SignBit));
1603 orl(kScratchRegister, liftoff::kScratchRegister2);
1604 Movd(dst, kScratchRegister);
1605 }
1606
emit_f32_abs(DoubleRegister dst,DoubleRegister src)1607 void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
1608 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1609 if (dst == src) {
1610 TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1611 Andps(dst, kScratchDoubleReg);
1612 } else {
1613 TurboAssembler::Move(dst, kSignBit - 1);
1614 Andps(dst, src);
1615 }
1616 }
1617
emit_f32_neg(DoubleRegister dst,DoubleRegister src)1618 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
1619 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1620 if (dst == src) {
1621 TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1622 Xorps(dst, kScratchDoubleReg);
1623 } else {
1624 TurboAssembler::Move(dst, kSignBit);
1625 Xorps(dst, src);
1626 }
1627 }
1628
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)1629 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1630 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1631 Roundss(dst, src, kRoundUp);
1632 return true;
1633 }
1634
emit_f32_floor(DoubleRegister dst,DoubleRegister src)1635 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1636 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1637 Roundss(dst, src, kRoundDown);
1638 return true;
1639 }
1640
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)1641 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1642 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1643 Roundss(dst, src, kRoundToZero);
1644 return true;
1645 }
1646
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)1647 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1648 DoubleRegister src) {
1649 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1650 Roundss(dst, src, kRoundToNearest);
1651 return true;
1652 }
1653
emit_f32_sqrt(DoubleRegister dst,DoubleRegister src)1654 void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
1655 Sqrtss(dst, src);
1656 }
1657
emit_f64_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1658 void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
1659 DoubleRegister rhs) {
1660 if (CpuFeatures::IsSupported(AVX)) {
1661 CpuFeatureScope scope(this, AVX);
1662 vaddsd(dst, lhs, rhs);
1663 } else if (dst == rhs) {
1664 addsd(dst, lhs);
1665 } else {
1666 if (dst != lhs) movsd(dst, lhs);
1667 addsd(dst, rhs);
1668 }
1669 }
1670
emit_f64_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1671 void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
1672 DoubleRegister rhs) {
1673 if (CpuFeatures::IsSupported(AVX)) {
1674 CpuFeatureScope scope(this, AVX);
1675 vsubsd(dst, lhs, rhs);
1676 } else if (dst == rhs) {
1677 movsd(kScratchDoubleReg, rhs);
1678 movsd(dst, lhs);
1679 subsd(dst, kScratchDoubleReg);
1680 } else {
1681 if (dst != lhs) movsd(dst, lhs);
1682 subsd(dst, rhs);
1683 }
1684 }
1685
emit_f64_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1686 void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
1687 DoubleRegister rhs) {
1688 if (CpuFeatures::IsSupported(AVX)) {
1689 CpuFeatureScope scope(this, AVX);
1690 vmulsd(dst, lhs, rhs);
1691 } else if (dst == rhs) {
1692 mulsd(dst, lhs);
1693 } else {
1694 if (dst != lhs) movsd(dst, lhs);
1695 mulsd(dst, rhs);
1696 }
1697 }
1698
emit_f64_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1699 void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
1700 DoubleRegister rhs) {
1701 if (CpuFeatures::IsSupported(AVX)) {
1702 CpuFeatureScope scope(this, AVX);
1703 vdivsd(dst, lhs, rhs);
1704 } else if (dst == rhs) {
1705 movsd(kScratchDoubleReg, rhs);
1706 movsd(dst, lhs);
1707 divsd(dst, kScratchDoubleReg);
1708 } else {
1709 if (dst != lhs) movsd(dst, lhs);
1710 divsd(dst, rhs);
1711 }
1712 }
1713
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1714 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1715 DoubleRegister rhs) {
1716 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1717 liftoff::MinOrMax::kMin);
1718 }
1719
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1720 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1721 DoubleRegister rhs) {
1722 // Extract sign bit from {rhs} into {kScratchRegister2}.
1723 Movq(liftoff::kScratchRegister2, rhs);
1724 shrq(liftoff::kScratchRegister2, Immediate(63));
1725 shlq(liftoff::kScratchRegister2, Immediate(63));
1726 // Reset sign bit of {lhs} (in {kScratchRegister}).
1727 Movq(kScratchRegister, lhs);
1728 btrq(kScratchRegister, Immediate(63));
1729 // Combine both values into {kScratchRegister} and move into {dst}.
1730 orq(kScratchRegister, liftoff::kScratchRegister2);
1731 Movq(dst, kScratchRegister);
1732 }
1733
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1734 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1735 DoubleRegister rhs) {
1736 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1737 liftoff::MinOrMax::kMax);
1738 }
1739
emit_f64_abs(DoubleRegister dst,DoubleRegister src)1740 void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
1741 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1742 if (dst == src) {
1743 TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1744 Andpd(dst, kScratchDoubleReg);
1745 } else {
1746 TurboAssembler::Move(dst, kSignBit - 1);
1747 Andpd(dst, src);
1748 }
1749 }
1750
emit_f64_neg(DoubleRegister dst,DoubleRegister src)1751 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
1752 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1753 if (dst == src) {
1754 TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1755 Xorpd(dst, kScratchDoubleReg);
1756 } else {
1757 TurboAssembler::Move(dst, kSignBit);
1758 Xorpd(dst, src);
1759 }
1760 }
1761
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)1762 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1763 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1764 Roundsd(dst, src, kRoundUp);
1765 return true;
1766 }
1767
emit_f64_floor(DoubleRegister dst,DoubleRegister src)1768 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1769 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1770 Roundsd(dst, src, kRoundDown);
1771 return true;
1772 }
1773
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)1774 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1775 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1776 Roundsd(dst, src, kRoundToZero);
1777 return true;
1778 }
1779
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)1780 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1781 DoubleRegister src) {
1782 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1783 Roundsd(dst, src, kRoundToNearest);
1784 return true;
1785 }
1786
emit_f64_sqrt(DoubleRegister dst,DoubleRegister src)1787 void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
1788 Sqrtsd(dst, src);
1789 }
1790
1791 namespace liftoff {
1792 #define __ assm->
1793 // Used for float to int conversions. If the value in {converted_back} equals
1794 // {src} afterwards, the conversion succeeded.
1795 template <typename dst_type, typename src_type>
ConvertFloatToIntAndBack(LiftoffAssembler * assm,Register dst,DoubleRegister src,DoubleRegister converted_back)1796 inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
1797 DoubleRegister src,
1798 DoubleRegister converted_back) {
1799 if (std::is_same<double, src_type>::value) { // f64
1800 if (std::is_same<int32_t, dst_type>::value) { // f64 -> i32
1801 __ Cvttsd2si(dst, src);
1802 __ Cvtlsi2sd(converted_back, dst);
1803 } else if (std::is_same<uint32_t, dst_type>::value) { // f64 -> u32
1804 __ Cvttsd2siq(dst, src);
1805 __ movl(dst, dst);
1806 __ Cvtqsi2sd(converted_back, dst);
1807 } else if (std::is_same<int64_t, dst_type>::value) { // f64 -> i64
1808 __ Cvttsd2siq(dst, src);
1809 __ Cvtqsi2sd(converted_back, dst);
1810 } else {
1811 UNREACHABLE();
1812 }
1813 } else { // f32
1814 if (std::is_same<int32_t, dst_type>::value) { // f32 -> i32
1815 __ Cvttss2si(dst, src);
1816 __ Cvtlsi2ss(converted_back, dst);
1817 } else if (std::is_same<uint32_t, dst_type>::value) { // f32 -> u32
1818 __ Cvttss2siq(dst, src);
1819 __ movl(dst, dst);
1820 __ Cvtqsi2ss(converted_back, dst);
1821 } else if (std::is_same<int64_t, dst_type>::value) { // f32 -> i64
1822 __ Cvttss2siq(dst, src);
1823 __ Cvtqsi2ss(converted_back, dst);
1824 } else {
1825 UNREACHABLE();
1826 }
1827 }
1828 }
1829
1830 template <typename dst_type, typename src_type>
EmitTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src,Label * trap)1831 inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1832 DoubleRegister src, Label* trap) {
1833 if (!CpuFeatures::IsSupported(SSE4_1)) {
1834 __ bailout(kMissingCPUFeature, "no SSE4.1");
1835 return true;
1836 }
1837 CpuFeatureScope feature(assm, SSE4_1);
1838
1839 DoubleRegister rounded = kScratchDoubleReg;
1840 DoubleRegister converted_back = kScratchDoubleReg2;
1841
1842 if (std::is_same<double, src_type>::value) { // f64
1843 __ Roundsd(rounded, src, kRoundToZero);
1844 } else { // f32
1845 __ Roundss(rounded, src, kRoundToZero);
1846 }
1847 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1848 converted_back);
1849 if (std::is_same<double, src_type>::value) { // f64
1850 __ Ucomisd(converted_back, rounded);
1851 } else { // f32
1852 __ Ucomiss(converted_back, rounded);
1853 }
1854
1855 // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
1856 // equal.
1857 __ j(parity_even, trap);
1858 __ j(not_equal, trap);
1859 return true;
1860 }
1861
1862 template <typename dst_type, typename src_type>
EmitSatTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src)1863 inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1864 DoubleRegister src) {
1865 if (!CpuFeatures::IsSupported(SSE4_1)) {
1866 __ bailout(kMissingCPUFeature, "no SSE4.1");
1867 return true;
1868 }
1869 CpuFeatureScope feature(assm, SSE4_1);
1870
1871 Label done;
1872 Label not_nan;
1873 Label src_positive;
1874
1875 DoubleRegister rounded = kScratchDoubleReg;
1876 DoubleRegister converted_back = kScratchDoubleReg2;
1877 DoubleRegister zero_reg = kScratchDoubleReg;
1878
1879 if (std::is_same<double, src_type>::value) { // f64
1880 __ Roundsd(rounded, src, kRoundToZero);
1881 } else { // f32
1882 __ Roundss(rounded, src, kRoundToZero);
1883 }
1884
1885 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1886 converted_back);
1887 if (std::is_same<double, src_type>::value) { // f64
1888 __ Ucomisd(converted_back, rounded);
1889 } else { // f32
1890 __ Ucomiss(converted_back, rounded);
1891 }
1892
1893 // Return 0 if PF is 0 (one of the operands was NaN)
1894 __ j(parity_odd, ¬_nan);
1895 __ xorl(dst, dst);
1896 __ jmp(&done);
1897
1898 __ bind(¬_nan);
1899 // If rounding is as expected, return result
1900 __ j(equal, &done);
1901
1902 __ xorpd(zero_reg, zero_reg);
1903
1904 // if out-of-bounds, check if src is positive
1905 if (std::is_same<double, src_type>::value) { // f64
1906 __ Ucomisd(src, zero_reg);
1907 } else { // f32
1908 __ Ucomiss(src, zero_reg);
1909 }
1910 __ j(above, &src_positive);
1911 if (std::is_same<int32_t, dst_type>::value ||
1912 std::is_same<uint32_t, dst_type>::value) { // i32
1913 __ movl(
1914 dst,
1915 Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::min())));
1916 } else if (std::is_same<int64_t, dst_type>::value) { // i64s
1917 __ movq(dst, Immediate64(std::numeric_limits<dst_type>::min()));
1918 } else {
1919 UNREACHABLE();
1920 }
1921 __ jmp(&done);
1922
1923 __ bind(&src_positive);
1924 if (std::is_same<int32_t, dst_type>::value ||
1925 std::is_same<uint32_t, dst_type>::value) { // i32
1926 __ movl(
1927 dst,
1928 Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::max())));
1929 } else if (std::is_same<int64_t, dst_type>::value) { // i64s
1930 __ movq(dst, Immediate64(std::numeric_limits<dst_type>::max()));
1931 } else {
1932 UNREACHABLE();
1933 }
1934
1935 __ bind(&done);
1936 return true;
1937 }
1938
1939 template <typename src_type>
EmitSatTruncateFloatToUInt64(LiftoffAssembler * assm,Register dst,DoubleRegister src)1940 inline bool EmitSatTruncateFloatToUInt64(LiftoffAssembler* assm, Register dst,
1941 DoubleRegister src) {
1942 if (!CpuFeatures::IsSupported(SSE4_1)) {
1943 __ bailout(kMissingCPUFeature, "no SSE4.1");
1944 return true;
1945 }
1946 CpuFeatureScope feature(assm, SSE4_1);
1947
1948 Label done;
1949 Label neg_or_nan;
1950 Label overflow;
1951
1952 DoubleRegister zero_reg = kScratchDoubleReg;
1953
1954 __ xorpd(zero_reg, zero_reg);
1955 if (std::is_same<double, src_type>::value) { // f64
1956 __ Ucomisd(src, zero_reg);
1957 } else { // f32
1958 __ Ucomiss(src, zero_reg);
1959 }
1960 // Check if NaN
1961 __ j(parity_even, &neg_or_nan);
1962 __ j(below, &neg_or_nan);
1963 if (std::is_same<double, src_type>::value) { // f64
1964 __ Cvttsd2uiq(dst, src, &overflow);
1965 } else { // f32
1966 __ Cvttss2uiq(dst, src, &overflow);
1967 }
1968 __ jmp(&done);
1969
1970 __ bind(&neg_or_nan);
1971 __ movq(dst, zero_reg);
1972 __ jmp(&done);
1973
1974 __ bind(&overflow);
1975 __ movq(dst, Immediate64(std::numeric_limits<uint64_t>::max()));
1976 __ bind(&done);
1977 return true;
1978 }
1979 #undef __
1980 } // namespace liftoff
1981
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)1982 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1983 LiftoffRegister dst,
1984 LiftoffRegister src, Label* trap) {
1985 switch (opcode) {
1986 case kExprI32ConvertI64:
1987 movl(dst.gp(), src.gp());
1988 return true;
1989 case kExprI32SConvertF32:
1990 return liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(),
1991 src.fp(), trap);
1992 case kExprI32UConvertF32:
1993 return liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
1994 src.fp(), trap);
1995 case kExprI32SConvertF64:
1996 return liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(),
1997 src.fp(), trap);
1998 case kExprI32UConvertF64:
1999 return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2000 src.fp(), trap);
2001 case kExprI32SConvertSatF32:
2002 return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2003 src.fp());
2004 case kExprI32UConvertSatF32:
2005 return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2006 src.fp());
2007 case kExprI32SConvertSatF64:
2008 return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2009 src.fp());
2010 case kExprI32UConvertSatF64:
2011 return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
2012 this, dst.gp(), src.fp());
2013 case kExprI32ReinterpretF32:
2014 Movd(dst.gp(), src.fp());
2015 return true;
2016 case kExprI64SConvertI32:
2017 movsxlq(dst.gp(), src.gp());
2018 return true;
2019 case kExprI64SConvertF32:
2020 return liftoff::EmitTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2021 src.fp(), trap);
2022 case kExprI64UConvertF32: {
2023 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2024 Cvttss2uiq(dst.gp(), src.fp(), trap);
2025 return true;
2026 }
2027 case kExprI64SConvertF64:
2028 return liftoff::EmitTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2029 src.fp(), trap);
2030 case kExprI64UConvertF64: {
2031 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2032 Cvttsd2uiq(dst.gp(), src.fp(), trap);
2033 return true;
2034 }
2035 case kExprI64SConvertSatF32:
2036 return liftoff::EmitSatTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2037 src.fp());
2038 case kExprI64UConvertSatF32: {
2039 return liftoff::EmitSatTruncateFloatToUInt64<float>(this, dst.gp(),
2040 src.fp());
2041 }
2042 case kExprI64SConvertSatF64:
2043 return liftoff::EmitSatTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2044 src.fp());
2045 case kExprI64UConvertSatF64: {
2046 return liftoff::EmitSatTruncateFloatToUInt64<double>(this, dst.gp(),
2047 src.fp());
2048 }
2049 case kExprI64UConvertI32:
2050 AssertZeroExtended(src.gp());
2051 if (dst.gp() != src.gp()) movl(dst.gp(), src.gp());
2052 return true;
2053 case kExprI64ReinterpretF64:
2054 Movq(dst.gp(), src.fp());
2055 return true;
2056 case kExprF32SConvertI32:
2057 Cvtlsi2ss(dst.fp(), src.gp());
2058 return true;
2059 case kExprF32UConvertI32:
2060 movl(kScratchRegister, src.gp());
2061 Cvtqsi2ss(dst.fp(), kScratchRegister);
2062 return true;
2063 case kExprF32SConvertI64:
2064 Cvtqsi2ss(dst.fp(), src.gp());
2065 return true;
2066 case kExprF32UConvertI64:
2067 Cvtqui2ss(dst.fp(), src.gp());
2068 return true;
2069 case kExprF32ConvertF64:
2070 Cvtsd2ss(dst.fp(), src.fp());
2071 return true;
2072 case kExprF32ReinterpretI32:
2073 Movd(dst.fp(), src.gp());
2074 return true;
2075 case kExprF64SConvertI32:
2076 Cvtlsi2sd(dst.fp(), src.gp());
2077 return true;
2078 case kExprF64UConvertI32:
2079 movl(kScratchRegister, src.gp());
2080 Cvtqsi2sd(dst.fp(), kScratchRegister);
2081 return true;
2082 case kExprF64SConvertI64:
2083 Cvtqsi2sd(dst.fp(), src.gp());
2084 return true;
2085 case kExprF64UConvertI64:
2086 Cvtqui2sd(dst.fp(), src.gp());
2087 return true;
2088 case kExprF64ConvertF32:
2089 Cvtss2sd(dst.fp(), src.fp());
2090 return true;
2091 case kExprF64ReinterpretI64:
2092 Movq(dst.fp(), src.gp());
2093 return true;
2094 default:
2095 UNREACHABLE();
2096 }
2097 }
2098
emit_i32_signextend_i8(Register dst,Register src)2099 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2100 movsxbl(dst, src);
2101 }
2102
emit_i32_signextend_i16(Register dst,Register src)2103 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2104 movsxwl(dst, src);
2105 }
2106
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2107 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2108 LiftoffRegister src) {
2109 movsxbq(dst.gp(), src.gp());
2110 }
2111
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2112 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2113 LiftoffRegister src) {
2114 movsxwq(dst.gp(), src.gp());
2115 }
2116
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2117 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2118 LiftoffRegister src) {
2119 movsxlq(dst.gp(), src.gp());
2120 }
2121
emit_jump(Label * label)2122 void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2123
emit_jump(Register target)2124 void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2125
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2126 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2127 Label* label, ValueKind kind,
2128 Register lhs, Register rhs) {
2129 Condition cond = liftoff::ToCondition(liftoff_cond);
2130 if (rhs != no_reg) {
2131 switch (kind) {
2132 case kI32:
2133 cmpl(lhs, rhs);
2134 break;
2135 case kRef:
2136 case kOptRef:
2137 case kRtt:
2138 case kRttWithDepth:
2139 DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
2140 V8_FALLTHROUGH;
2141 case kI64:
2142 cmpq(lhs, rhs);
2143 break;
2144 default:
2145 UNREACHABLE();
2146 }
2147 } else {
2148 DCHECK_EQ(kind, kI32);
2149 testl(lhs, lhs);
2150 }
2151
2152 j(cond, label);
2153 }
2154
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int imm)2155 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2156 Label* label, Register lhs,
2157 int imm) {
2158 Condition cond = liftoff::ToCondition(liftoff_cond);
2159 cmpl(lhs, Immediate(imm));
2160 j(cond, label);
2161 }
2162
emit_i32_eqz(Register dst,Register src)2163 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2164 testl(src, src);
2165 setcc(equal, dst);
2166 movzxbl(dst, dst);
2167 }
2168
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2169 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2170 Register dst, Register lhs,
2171 Register rhs) {
2172 Condition cond = liftoff::ToCondition(liftoff_cond);
2173 cmpl(lhs, rhs);
2174 setcc(cond, dst);
2175 movzxbl(dst, dst);
2176 }
2177
emit_i64_eqz(Register dst,LiftoffRegister src)2178 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2179 testq(src.gp(), src.gp());
2180 setcc(equal, dst);
2181 movzxbl(dst, dst);
2182 }
2183
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2184 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2185 Register dst, LiftoffRegister lhs,
2186 LiftoffRegister rhs) {
2187 Condition cond = liftoff::ToCondition(liftoff_cond);
2188 cmpq(lhs.gp(), rhs.gp());
2189 setcc(cond, dst);
2190 movzxbl(dst, dst);
2191 }
2192
2193 namespace liftoff {
2194 template <void (SharedTurboAssembler::*cmp_op)(DoubleRegister, DoubleRegister)>
EmitFloatSetCond(LiftoffAssembler * assm,Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2195 void EmitFloatSetCond(LiftoffAssembler* assm, Condition cond, Register dst,
2196 DoubleRegister lhs, DoubleRegister rhs) {
2197 Label cont;
2198 Label not_nan;
2199
2200 (assm->*cmp_op)(lhs, rhs);
2201 // If PF is one, one of the operands was NaN. This needs special handling.
2202 assm->j(parity_odd, ¬_nan, Label::kNear);
2203 // Return 1 for f32.ne, 0 for all other cases.
2204 if (cond == not_equal) {
2205 assm->movl(dst, Immediate(1));
2206 } else {
2207 assm->xorl(dst, dst);
2208 }
2209 assm->jmp(&cont, Label::kNear);
2210 assm->bind(¬_nan);
2211
2212 assm->setcc(cond, dst);
2213 assm->movzxbl(dst, dst);
2214 assm->bind(&cont);
2215 }
2216 } // namespace liftoff
2217
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2218 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2219 Register dst, DoubleRegister lhs,
2220 DoubleRegister rhs) {
2221 Condition cond = liftoff::ToCondition(liftoff_cond);
2222 liftoff::EmitFloatSetCond<&TurboAssembler::Ucomiss>(this, cond, dst, lhs,
2223 rhs);
2224 }
2225
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2226 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2227 Register dst, DoubleRegister lhs,
2228 DoubleRegister rhs) {
2229 Condition cond = liftoff::ToCondition(liftoff_cond);
2230 liftoff::EmitFloatSetCond<&TurboAssembler::Ucomisd>(this, cond, dst, lhs,
2231 rhs);
2232 }
2233
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2234 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2235 LiftoffRegister true_value,
2236 LiftoffRegister false_value,
2237 ValueKind kind) {
2238 if (kind != kI32 && kind != kI64) return false;
2239
2240 testl(condition, condition);
2241
2242 if (kind == kI32) {
2243 if (dst == false_value) {
2244 cmovl(not_zero, dst.gp(), true_value.gp());
2245 } else {
2246 if (dst != true_value) movl(dst.gp(), true_value.gp());
2247 cmovl(zero, dst.gp(), false_value.gp());
2248 }
2249 } else {
2250 if (dst == false_value) {
2251 cmovq(not_zero, dst.gp(), true_value.gp());
2252 } else {
2253 if (dst != true_value) movq(dst.gp(), true_value.gp());
2254 cmovq(zero, dst.gp(), false_value.gp());
2255 }
2256 }
2257
2258 return true;
2259 }
2260
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2261 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2262 SmiCheckMode mode) {
2263 testb(obj, Immediate(kSmiTagMask));
2264 Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2265 j(condition, target);
2266 }
2267
2268 // TODO(fanchenk): Distinguish mov* if data bypass delay matter.
2269 namespace liftoff {
2270 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2271 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2272 void EmitSimdCommutativeBinOp(
2273 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2274 LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2275 if (CpuFeatures::IsSupported(AVX)) {
2276 CpuFeatureScope scope(assm, AVX);
2277 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2278 return;
2279 }
2280
2281 base::Optional<CpuFeatureScope> sse_scope;
2282 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2283
2284 if (dst.fp() == rhs.fp()) {
2285 (assm->*sse_op)(dst.fp(), lhs.fp());
2286 } else {
2287 if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2288 (assm->*sse_op)(dst.fp(), rhs.fp());
2289 }
2290 }
2291
2292 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2293 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2294 void EmitSimdNonCommutativeBinOp(
2295 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2296 LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2297 if (CpuFeatures::IsSupported(AVX)) {
2298 CpuFeatureScope scope(assm, AVX);
2299 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2300 return;
2301 }
2302
2303 base::Optional<CpuFeatureScope> sse_scope;
2304 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2305
2306 if (dst.fp() == rhs.fp()) {
2307 assm->movaps(kScratchDoubleReg, rhs.fp());
2308 assm->movaps(dst.fp(), lhs.fp());
2309 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2310 } else {
2311 if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2312 (assm->*sse_op)(dst.fp(), rhs.fp());
2313 }
2314 }
2315
2316 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2317 void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
EmitSimdShiftOp(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,LiftoffRegister count)2318 void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
2319 LiftoffRegister operand, LiftoffRegister count) {
2320 constexpr int mask = (1 << width) - 1;
2321 assm->movq(kScratchRegister, count.gp());
2322 assm->andq(kScratchRegister, Immediate(mask));
2323 assm->Movq(kScratchDoubleReg, kScratchRegister);
2324 if (CpuFeatures::IsSupported(AVX)) {
2325 CpuFeatureScope scope(assm, AVX);
2326 (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2327 } else {
2328 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2329 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2330 }
2331 }
2332
2333 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
2334 void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
EmitSimdShiftOpImm(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,int32_t count)2335 void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
2336 LiftoffRegister operand, int32_t count) {
2337 constexpr int mask = (1 << width) - 1;
2338 byte shift = static_cast<byte>(count & mask);
2339 if (CpuFeatures::IsSupported(AVX)) {
2340 CpuFeatureScope scope(assm, AVX);
2341 (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2342 } else {
2343 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2344 (assm->*sse_op)(dst.fp(), shift);
2345 }
2346 }
2347
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)2348 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2349 LiftoffRegister src) {
2350 assm->xorq(dst.gp(), dst.gp());
2351 assm->Ptest(src.fp(), src.fp());
2352 assm->setcc(not_equal, dst.gp());
2353 }
2354
2355 template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
2356 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2357 LiftoffRegister src,
2358 base::Optional<CpuFeature> feature = base::nullopt) {
2359 base::Optional<CpuFeatureScope> sse_scope;
2360 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2361
2362 XMMRegister tmp = kScratchDoubleReg;
2363 assm->xorq(dst.gp(), dst.gp());
2364 assm->Pxor(tmp, tmp);
2365 (assm->*pcmp)(tmp, src.fp());
2366 assm->Ptest(tmp, tmp);
2367 assm->setcc(equal, dst.gp());
2368 }
2369
2370 } // namespace liftoff
2371
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2372 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2373 Register offset_reg, uintptr_t offset_imm,
2374 LoadType type,
2375 LoadTransformationKind transform,
2376 uint32_t* protected_load_pc) {
2377 Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
2378 *protected_load_pc = pc_offset();
2379 MachineType memtype = type.mem_type();
2380 if (transform == LoadTransformationKind::kExtend) {
2381 if (memtype == MachineType::Int8()) {
2382 Pmovsxbw(dst.fp(), src_op);
2383 } else if (memtype == MachineType::Uint8()) {
2384 Pmovzxbw(dst.fp(), src_op);
2385 } else if (memtype == MachineType::Int16()) {
2386 Pmovsxwd(dst.fp(), src_op);
2387 } else if (memtype == MachineType::Uint16()) {
2388 Pmovzxwd(dst.fp(), src_op);
2389 } else if (memtype == MachineType::Int32()) {
2390 Pmovsxdq(dst.fp(), src_op);
2391 } else if (memtype == MachineType::Uint32()) {
2392 Pmovzxdq(dst.fp(), src_op);
2393 }
2394 } else if (transform == LoadTransformationKind::kZeroExtend) {
2395 if (memtype == MachineType::Int32()) {
2396 Movss(dst.fp(), src_op);
2397 } else {
2398 DCHECK_EQ(MachineType::Int64(), memtype);
2399 Movsd(dst.fp(), src_op);
2400 }
2401 } else {
2402 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2403 if (memtype == MachineType::Int8()) {
2404 S128Load8Splat(dst.fp(), src_op, kScratchDoubleReg);
2405 } else if (memtype == MachineType::Int16()) {
2406 S128Load16Splat(dst.fp(), src_op, kScratchDoubleReg);
2407 } else if (memtype == MachineType::Int32()) {
2408 S128Load32Splat(dst.fp(), src_op);
2409 } else if (memtype == MachineType::Int64()) {
2410 Movddup(dst.fp(), src_op);
2411 }
2412 }
2413 }
2414
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2415 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2416 Register addr, Register offset_reg,
2417 uintptr_t offset_imm, LoadType type,
2418 uint8_t laneidx, uint32_t* protected_load_pc) {
2419 Operand src_op = liftoff::GetMemOp(this, addr, offset_reg, offset_imm);
2420
2421 MachineType mem_type = type.mem_type();
2422 if (mem_type == MachineType::Int8()) {
2423 Pinsrb(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2424 } else if (mem_type == MachineType::Int16()) {
2425 Pinsrw(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2426 } else if (mem_type == MachineType::Int32()) {
2427 Pinsrd(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2428 } else {
2429 DCHECK_EQ(MachineType::Int64(), mem_type);
2430 Pinsrq(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2431 }
2432 }
2433
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)2434 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2435 uintptr_t offset_imm, LiftoffRegister src,
2436 StoreType type, uint8_t lane,
2437 uint32_t* protected_store_pc) {
2438 Operand dst_op = liftoff::GetMemOp(this, dst, offset, offset_imm);
2439 if (protected_store_pc) *protected_store_pc = pc_offset();
2440 MachineRepresentation rep = type.mem_rep();
2441 if (rep == MachineRepresentation::kWord8) {
2442 Pextrb(dst_op, src.fp(), lane);
2443 } else if (rep == MachineRepresentation::kWord16) {
2444 Pextrw(dst_op, src.fp(), lane);
2445 } else if (rep == MachineRepresentation::kWord32) {
2446 S128Store32Lane(dst_op, src.fp(), lane);
2447 } else {
2448 DCHECK_EQ(MachineRepresentation::kWord64, rep);
2449 S128Store64Lane(dst_op, src.fp(), lane);
2450 }
2451 }
2452
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2453 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2454 LiftoffRegister lhs,
2455 LiftoffRegister rhs,
2456 const uint8_t shuffle[16],
2457 bool is_swizzle) {
2458 if (is_swizzle) {
2459 uint32_t imms[4];
2460 // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
2461 wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
2462 TurboAssembler::Move(kScratchDoubleReg, make_uint64(imms[3], imms[2]),
2463 make_uint64(imms[1], imms[0]));
2464 Pshufb(dst.fp(), lhs.fp(), kScratchDoubleReg);
2465 return;
2466 }
2467
2468 uint64_t mask1[2] = {};
2469 for (int i = 15; i >= 0; i--) {
2470 uint8_t lane = shuffle[i];
2471 int j = i >> 3;
2472 mask1[j] <<= 8;
2473 mask1[j] |= lane < kSimd128Size ? lane : 0x80;
2474 }
2475 TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask1[1], mask1[0]);
2476 Pshufb(kScratchDoubleReg, lhs.fp(), liftoff::kScratchDoubleReg2);
2477
2478 uint64_t mask2[2] = {};
2479 for (int i = 15; i >= 0; i--) {
2480 uint8_t lane = shuffle[i];
2481 int j = i >> 3;
2482 mask2[j] <<= 8;
2483 mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
2484 }
2485 TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask2[1], mask2[0]);
2486
2487 Pshufb(dst.fp(), rhs.fp(), liftoff::kScratchDoubleReg2);
2488 Por(dst.fp(), kScratchDoubleReg);
2489 }
2490
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2491 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2492 LiftoffRegister lhs,
2493 LiftoffRegister rhs) {
2494 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg,
2495 kScratchRegister);
2496 }
2497
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)2498 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2499 LiftoffRegister src) {
2500 I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg,
2501 liftoff::kScratchDoubleReg2, kScratchRegister);
2502 }
2503
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2504 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2505 LiftoffRegister src) {
2506 I8x16Splat(dst.fp(), src.gp(), kScratchDoubleReg);
2507 }
2508
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2509 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2510 LiftoffRegister src) {
2511 I16x8Splat(dst.fp(), src.gp());
2512 }
2513
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)2514 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2515 LiftoffRegister src) {
2516 Movd(dst.fp(), src.gp());
2517 Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
2518 }
2519
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2520 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2521 LiftoffRegister src) {
2522 Movq(dst.fp(), src.gp());
2523 Movddup(dst.fp(), dst.fp());
2524 }
2525
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2526 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2527 LiftoffRegister src) {
2528 F32x4Splat(dst.fp(), src.fp());
2529 }
2530
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2531 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2532 LiftoffRegister src) {
2533 Movddup(dst.fp(), src.fp());
2534 }
2535
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2536 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2537 LiftoffRegister rhs) {
2538 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2539 this, dst, lhs, rhs);
2540 }
2541
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2542 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2543 LiftoffRegister rhs) {
2544 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2545 this, dst, lhs, rhs);
2546 Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2547 Pxor(dst.fp(), kScratchDoubleReg);
2548 }
2549
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2550 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2551 LiftoffRegister rhs) {
2552 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
2553 &Assembler::pcmpgtb>(this, dst, lhs,
2554 rhs);
2555 }
2556
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2557 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2558 LiftoffRegister rhs) {
2559 DoubleRegister ref = rhs.fp();
2560 if (dst == rhs) {
2561 Movaps(kScratchDoubleReg, rhs.fp());
2562 ref = kScratchDoubleReg;
2563 }
2564 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2565 this, dst, lhs, rhs, SSE4_1);
2566 Pcmpeqb(dst.fp(), ref);
2567 Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2568 Pxor(dst.fp(), kScratchDoubleReg);
2569 }
2570
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2571 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2572 LiftoffRegister rhs) {
2573 DoubleRegister ref = rhs.fp();
2574 if (dst == rhs) {
2575 Movaps(kScratchDoubleReg, rhs.fp());
2576 ref = kScratchDoubleReg;
2577 }
2578 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2579 this, dst, lhs, rhs, SSE4_1);
2580 Pcmpeqb(dst.fp(), ref);
2581 }
2582
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2583 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2584 LiftoffRegister rhs) {
2585 DoubleRegister ref = rhs.fp();
2586 if (dst == rhs) {
2587 Movaps(kScratchDoubleReg, rhs.fp());
2588 ref = kScratchDoubleReg;
2589 }
2590 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2591 this, dst, lhs, rhs);
2592 Pcmpeqb(dst.fp(), ref);
2593 }
2594
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2595 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2596 LiftoffRegister rhs) {
2597 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2598 this, dst, lhs, rhs);
2599 }
2600
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2601 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2602 LiftoffRegister rhs) {
2603 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2604 this, dst, lhs, rhs);
2605 Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2606 Pxor(dst.fp(), kScratchDoubleReg);
2607 }
2608
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2609 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2610 LiftoffRegister rhs) {
2611 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
2612 &Assembler::pcmpgtw>(this, dst, lhs,
2613 rhs);
2614 }
2615
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2616 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2617 LiftoffRegister rhs) {
2618 DoubleRegister ref = rhs.fp();
2619 if (dst == rhs) {
2620 Movaps(kScratchDoubleReg, rhs.fp());
2621 ref = kScratchDoubleReg;
2622 }
2623 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
2624 this, dst, lhs, rhs);
2625 Pcmpeqw(dst.fp(), ref);
2626 Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2627 Pxor(dst.fp(), kScratchDoubleReg);
2628 }
2629
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2630 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2631 LiftoffRegister rhs) {
2632 DoubleRegister ref = rhs.fp();
2633 if (dst == rhs) {
2634 Movaps(kScratchDoubleReg, rhs.fp());
2635 ref = kScratchDoubleReg;
2636 }
2637 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
2638 this, dst, lhs, rhs);
2639 Pcmpeqw(dst.fp(), ref);
2640 }
2641
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2642 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2643 LiftoffRegister rhs) {
2644 DoubleRegister ref = rhs.fp();
2645 if (dst == rhs) {
2646 Movaps(kScratchDoubleReg, rhs.fp());
2647 ref = kScratchDoubleReg;
2648 }
2649 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
2650 this, dst, lhs, rhs, SSE4_1);
2651 Pcmpeqw(dst.fp(), ref);
2652 }
2653
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2654 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2655 LiftoffRegister rhs) {
2656 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2657 this, dst, lhs, rhs);
2658 }
2659
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2660 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2661 LiftoffRegister rhs) {
2662 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2663 this, dst, lhs, rhs);
2664 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2665 Pxor(dst.fp(), kScratchDoubleReg);
2666 }
2667
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2668 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2669 LiftoffRegister rhs) {
2670 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
2671 &Assembler::pcmpgtd>(this, dst, lhs,
2672 rhs);
2673 }
2674
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2675 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2676 LiftoffRegister rhs) {
2677 DoubleRegister ref = rhs.fp();
2678 if (dst == rhs) {
2679 Movaps(kScratchDoubleReg, rhs.fp());
2680 ref = kScratchDoubleReg;
2681 }
2682 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
2683 this, dst, lhs, rhs, SSE4_1);
2684 Pcmpeqd(dst.fp(), ref);
2685 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2686 Pxor(dst.fp(), kScratchDoubleReg);
2687 }
2688
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2689 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2690 LiftoffRegister rhs) {
2691 DoubleRegister ref = rhs.fp();
2692 if (dst == rhs) {
2693 Movaps(kScratchDoubleReg, rhs.fp());
2694 ref = kScratchDoubleReg;
2695 }
2696 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
2697 this, dst, lhs, rhs, SSE4_1);
2698 Pcmpeqd(dst.fp(), ref);
2699 }
2700
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2701 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2702 LiftoffRegister rhs) {
2703 DoubleRegister ref = rhs.fp();
2704 if (dst == rhs) {
2705 Movaps(kScratchDoubleReg, rhs.fp());
2706 ref = kScratchDoubleReg;
2707 }
2708 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
2709 this, dst, lhs, rhs, SSE4_1);
2710 Pcmpeqd(dst.fp(), ref);
2711 }
2712
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2713 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2714 LiftoffRegister rhs) {
2715 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2716 this, dst, lhs, rhs, SSE4_1);
2717 }
2718
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2719 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2720 LiftoffRegister rhs) {
2721 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2722 this, dst, lhs, rhs, SSE4_1);
2723 Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
2724 Pxor(dst.fp(), kScratchDoubleReg);
2725 }
2726
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2727 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2728 LiftoffRegister rhs) {
2729 // Different register alias requirements depending on CpuFeatures supported:
2730 if (CpuFeatures::IsSupported(AVX) || CpuFeatures::IsSupported(SSE4_2)) {
2731 // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
2732 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2733 } else {
2734 // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2735 if (dst == lhs || dst == rhs) {
2736 I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2737 kScratchDoubleReg);
2738 movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2739 } else {
2740 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2741 }
2742 }
2743 }
2744
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2745 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2746 LiftoffRegister rhs) {
2747 // Different register alias requirements depending on CpuFeatures supported:
2748 if (CpuFeatures::IsSupported(AVX)) {
2749 // 1. AVX, no requirements.
2750 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2751 } else if (CpuFeatures::IsSupported(SSE4_2)) {
2752 // 2. SSE4_2, dst != lhs.
2753 if (dst == lhs) {
2754 I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2755 kScratchDoubleReg);
2756 movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2757 } else {
2758 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2759 }
2760 } else {
2761 // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2762 if (dst == lhs || dst == rhs) {
2763 I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2764 kScratchDoubleReg);
2765 movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2766 } else {
2767 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2768 }
2769 }
2770 }
2771
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2772 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2773 LiftoffRegister rhs) {
2774 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
2775 this, dst, lhs, rhs);
2776 }
2777
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2778 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2779 LiftoffRegister rhs) {
2780 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
2781 &Assembler::cmpneqps>(this, dst, lhs, rhs);
2782 }
2783
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2784 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2785 LiftoffRegister rhs) {
2786 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
2787 &Assembler::cmpltps>(this, dst, lhs,
2788 rhs);
2789 }
2790
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2791 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2792 LiftoffRegister rhs) {
2793 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
2794 &Assembler::cmpleps>(this, dst, lhs,
2795 rhs);
2796 }
2797
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2798 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2799 LiftoffRegister rhs) {
2800 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
2801 this, dst, lhs, rhs);
2802 }
2803
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2804 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2805 LiftoffRegister rhs) {
2806 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
2807 &Assembler::cmpneqpd>(this, dst, lhs, rhs);
2808 }
2809
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2810 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2811 LiftoffRegister rhs) {
2812 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
2813 &Assembler::cmpltpd>(this, dst, lhs,
2814 rhs);
2815 }
2816
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2817 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2818 LiftoffRegister rhs) {
2819 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
2820 &Assembler::cmplepd>(this, dst, lhs,
2821 rhs);
2822 }
2823
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])2824 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2825 const uint8_t imms[16]) {
2826 uint64_t vals[2];
2827 memcpy(vals, imms, sizeof(vals));
2828 TurboAssembler::Move(dst.fp(), vals[1], vals[0]);
2829 }
2830
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)2831 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2832 S128Not(dst.fp(), src.fp(), kScratchDoubleReg);
2833 }
2834
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2835 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2836 LiftoffRegister rhs) {
2837 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
2838 this, dst, lhs, rhs);
2839 }
2840
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2841 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2842 LiftoffRegister rhs) {
2843 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
2844 this, dst, lhs, rhs);
2845 }
2846
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2847 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2848 LiftoffRegister rhs) {
2849 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
2850 this, dst, lhs, rhs);
2851 }
2852
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)2853 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2854 LiftoffRegister src1,
2855 LiftoffRegister src2,
2856 LiftoffRegister mask) {
2857 // Ensure that we don't overwrite any inputs with the movaps below.
2858 DCHECK_NE(dst, src1);
2859 DCHECK_NE(dst, src2);
2860 if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
2861 movaps(dst.fp(), mask.fp());
2862 S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2863 } else {
2864 S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2865 }
2866 }
2867
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)2868 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2869 LiftoffRegister src) {
2870 if (dst.fp() == src.fp()) {
2871 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2872 Psignb(dst.fp(), kScratchDoubleReg);
2873 } else {
2874 Pxor(dst.fp(), dst.fp());
2875 Psubb(dst.fp(), src.fp());
2876 }
2877 }
2878
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)2879 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2880 LiftoffRegister src) {
2881 liftoff::EmitAnyTrue(this, dst, src);
2882 }
2883
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)2884 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2885 LiftoffRegister src) {
2886 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
2887 }
2888
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)2889 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2890 LiftoffRegister src) {
2891 Pmovmskb(dst.gp(), src.fp());
2892 }
2893
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2894 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2895 LiftoffRegister rhs) {
2896 I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2897 liftoff::kScratchDoubleReg2);
2898 }
2899
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2900 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2901 int32_t rhs) {
2902 I8x16Shl(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2903 }
2904
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2905 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2906 LiftoffRegister lhs,
2907 LiftoffRegister rhs) {
2908 I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2909 liftoff::kScratchDoubleReg2);
2910 }
2911
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2912 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2913 LiftoffRegister lhs, int32_t rhs) {
2914 I8x16ShrS(dst.fp(), lhs.fp(), rhs, kScratchDoubleReg);
2915 }
2916
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2917 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2918 LiftoffRegister lhs,
2919 LiftoffRegister rhs) {
2920 I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2921 liftoff::kScratchDoubleReg2);
2922 }
2923
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2924 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2925 LiftoffRegister lhs, int32_t rhs) {
2926 I8x16ShrU(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2927 }
2928
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2929 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2930 LiftoffRegister rhs) {
2931 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
2932 this, dst, lhs, rhs);
2933 }
2934
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2935 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2936 LiftoffRegister lhs,
2937 LiftoffRegister rhs) {
2938 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
2939 this, dst, lhs, rhs);
2940 }
2941
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2942 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2943 LiftoffRegister lhs,
2944 LiftoffRegister rhs) {
2945 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
2946 this, dst, lhs, rhs);
2947 }
2948
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2949 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2950 LiftoffRegister rhs) {
2951 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
2952 this, dst, lhs, rhs);
2953 }
2954
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2955 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2956 LiftoffRegister lhs,
2957 LiftoffRegister rhs) {
2958 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
2959 this, dst, lhs, rhs);
2960 }
2961
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2962 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2963 LiftoffRegister lhs,
2964 LiftoffRegister rhs) {
2965 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
2966 &Assembler::psubusb>(this, dst, lhs,
2967 rhs);
2968 }
2969
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2970 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2971 LiftoffRegister lhs,
2972 LiftoffRegister rhs) {
2973 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2974 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
2975 }
2976
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2977 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2978 LiftoffRegister lhs,
2979 LiftoffRegister rhs) {
2980 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2981 this, dst, lhs, rhs);
2982 }
2983
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2984 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
2985 LiftoffRegister lhs,
2986 LiftoffRegister rhs) {
2987 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
2988 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
2989 }
2990
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2991 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
2992 LiftoffRegister lhs,
2993 LiftoffRegister rhs) {
2994 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2995 this, dst, lhs, rhs);
2996 }
2997
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)2998 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
2999 LiftoffRegister src) {
3000 if (dst.fp() == src.fp()) {
3001 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3002 Psignw(dst.fp(), kScratchDoubleReg);
3003 } else {
3004 Pxor(dst.fp(), dst.fp());
3005 Psubw(dst.fp(), src.fp());
3006 }
3007 }
3008
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3009 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3010 LiftoffRegister src) {
3011 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
3012 }
3013
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3014 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3015 LiftoffRegister src) {
3016 XMMRegister tmp = kScratchDoubleReg;
3017 Packsswb(tmp, src.fp());
3018 Pmovmskb(dst.gp(), tmp);
3019 shrq(dst.gp(), Immediate(8));
3020 }
3021
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3022 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3023 LiftoffRegister rhs) {
3024 liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3025 lhs, rhs);
3026 }
3027
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3028 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3029 int32_t rhs) {
3030 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3031 this, dst, lhs, rhs);
3032 }
3033
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3034 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3035 LiftoffRegister lhs,
3036 LiftoffRegister rhs) {
3037 liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3038 lhs, rhs);
3039 }
3040
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3041 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3042 LiftoffRegister lhs, int32_t rhs) {
3043 liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3044 this, dst, lhs, rhs);
3045 }
3046
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3047 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3048 LiftoffRegister lhs,
3049 LiftoffRegister rhs) {
3050 liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3051 lhs, rhs);
3052 }
3053
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3054 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3055 LiftoffRegister lhs, int32_t rhs) {
3056 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3057 this, dst, lhs, rhs);
3058 }
3059
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3060 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3061 LiftoffRegister rhs) {
3062 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3063 this, dst, lhs, rhs);
3064 }
3065
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3066 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3067 LiftoffRegister lhs,
3068 LiftoffRegister rhs) {
3069 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3070 this, dst, lhs, rhs);
3071 }
3072
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3073 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3074 LiftoffRegister lhs,
3075 LiftoffRegister rhs) {
3076 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3077 this, dst, lhs, rhs);
3078 }
3079
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3080 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3081 LiftoffRegister rhs) {
3082 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3083 this, dst, lhs, rhs);
3084 }
3085
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3086 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3087 LiftoffRegister lhs,
3088 LiftoffRegister rhs) {
3089 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3090 this, dst, lhs, rhs);
3091 }
3092
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3093 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3094 LiftoffRegister lhs,
3095 LiftoffRegister rhs) {
3096 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3097 &Assembler::psubusw>(this, dst, lhs,
3098 rhs);
3099 }
3100
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3101 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3102 LiftoffRegister rhs) {
3103 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3104 this, dst, lhs, rhs);
3105 }
3106
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3107 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3108 LiftoffRegister lhs,
3109 LiftoffRegister rhs) {
3110 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3111 this, dst, lhs, rhs);
3112 }
3113
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3114 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3115 LiftoffRegister lhs,
3116 LiftoffRegister rhs) {
3117 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3118 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3119 }
3120
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3121 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3122 LiftoffRegister lhs,
3123 LiftoffRegister rhs) {
3124 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3125 this, dst, lhs, rhs);
3126 }
3127
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3128 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3129 LiftoffRegister lhs,
3130 LiftoffRegister rhs) {
3131 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3132 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3133 }
3134
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3135 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3136 LiftoffRegister src) {
3137 I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), kScratchDoubleReg,
3138 kScratchRegister);
3139 }
3140
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3141 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3142 LiftoffRegister src) {
3143 I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(), kScratchRegister);
3144 }
3145
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3146 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3147 LiftoffRegister src1,
3148 LiftoffRegister src2) {
3149 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3150 /*is_signed=*/true);
3151 }
3152
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3153 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3154 LiftoffRegister src1,
3155 LiftoffRegister src2) {
3156 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3157 /*is_signed=*/false);
3158 }
3159
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3160 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3161 LiftoffRegister src1,
3162 LiftoffRegister src2) {
3163 I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3164 }
3165
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3166 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3167 LiftoffRegister src1,
3168 LiftoffRegister src2) {
3169 I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3170 }
3171
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3172 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3173 LiftoffRegister src1,
3174 LiftoffRegister src2) {
3175 I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3176 }
3177
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3178 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3179 LiftoffRegister src) {
3180 if (dst.fp() == src.fp()) {
3181 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3182 Psignd(dst.fp(), kScratchDoubleReg);
3183 } else {
3184 Pxor(dst.fp(), dst.fp());
3185 Psubd(dst.fp(), src.fp());
3186 }
3187 }
3188
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3189 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3190 LiftoffRegister src) {
3191 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
3192 }
3193
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3194 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3195 LiftoffRegister src) {
3196 Movmskps(dst.gp(), src.fp());
3197 }
3198
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3199 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3200 LiftoffRegister rhs) {
3201 liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3202 lhs, rhs);
3203 }
3204
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3205 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3206 int32_t rhs) {
3207 liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3208 this, dst, lhs, rhs);
3209 }
3210
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3211 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3212 LiftoffRegister lhs,
3213 LiftoffRegister rhs) {
3214 liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3215 lhs, rhs);
3216 }
3217
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3218 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3219 LiftoffRegister lhs, int32_t rhs) {
3220 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3221 this, dst, lhs, rhs);
3222 }
3223
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3224 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3225 LiftoffRegister lhs,
3226 LiftoffRegister rhs) {
3227 liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3228 lhs, rhs);
3229 }
3230
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3231 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3232 LiftoffRegister lhs, int32_t rhs) {
3233 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3234 this, dst, lhs, rhs);
3235 }
3236
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3237 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3238 LiftoffRegister rhs) {
3239 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3240 this, dst, lhs, rhs);
3241 }
3242
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3243 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3244 LiftoffRegister rhs) {
3245 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3246 this, dst, lhs, rhs);
3247 }
3248
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3249 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3250 LiftoffRegister rhs) {
3251 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3252 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3253 }
3254
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3255 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3256 LiftoffRegister lhs,
3257 LiftoffRegister rhs) {
3258 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3259 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3260 }
3261
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3262 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3263 LiftoffRegister lhs,
3264 LiftoffRegister rhs) {
3265 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3266 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3267 }
3268
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3269 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3270 LiftoffRegister lhs,
3271 LiftoffRegister rhs) {
3272 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3273 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3274 }
3275
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3276 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3277 LiftoffRegister lhs,
3278 LiftoffRegister rhs) {
3279 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3280 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3281 }
3282
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3283 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3284 LiftoffRegister lhs,
3285 LiftoffRegister rhs) {
3286 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3287 this, dst, lhs, rhs);
3288 }
3289
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3290 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3291 LiftoffRegister src) {
3292 I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(), kScratchRegister);
3293 }
3294
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3295 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3296 LiftoffRegister src) {
3297 I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), kScratchDoubleReg);
3298 }
3299
3300 namespace liftoff {
3301 // Helper function to check for register aliasing, AVX support, and moves
3302 // registers around before calling the actual macro-assembler function.
I32x4ExtMulHelper(LiftoffAssembler * assm,XMMRegister dst,XMMRegister src1,XMMRegister src2,bool low,bool is_signed)3303 inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
3304 XMMRegister src1, XMMRegister src2, bool low,
3305 bool is_signed) {
3306 // I32x4ExtMul requires dst == src1 if AVX is not supported.
3307 if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3308 assm->I32x4ExtMul(dst, src1, src2, kScratchDoubleReg, low, is_signed);
3309 } else if (dst != src2) {
3310 // dst != src1 && dst != src2
3311 assm->movaps(dst, src1);
3312 assm->I32x4ExtMul(dst, dst, src2, kScratchDoubleReg, low, is_signed);
3313 } else {
3314 // dst == src2
3315 // Extended multiplication is commutative,
3316 assm->movaps(dst, src2);
3317 assm->I32x4ExtMul(dst, dst, src1, kScratchDoubleReg, low, is_signed);
3318 }
3319 }
3320 } // namespace liftoff
3321
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3322 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3323 LiftoffRegister src1,
3324 LiftoffRegister src2) {
3325 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3326 /*is_signed=*/true);
3327 }
3328
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3329 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3330 LiftoffRegister src1,
3331 LiftoffRegister src2) {
3332 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3333 /*is_signed=*/false);
3334 }
3335
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3336 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3337 LiftoffRegister src1,
3338 LiftoffRegister src2) {
3339 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3340 /*low=*/false,
3341 /*is_signed=*/true);
3342 }
3343
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3344 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3345 LiftoffRegister src1,
3346 LiftoffRegister src2) {
3347 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3348 /*low=*/false,
3349 /*is_signed=*/false);
3350 }
3351
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)3352 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
3353 LiftoffRegister src) {
3354 I64x2Neg(dst.fp(), src.fp(), kScratchDoubleReg);
3355 }
3356
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)3357 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
3358 LiftoffRegister src) {
3359 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
3360 }
3361
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3362 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
3363 LiftoffRegister rhs) {
3364 liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
3365 lhs, rhs);
3366 }
3367
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3368 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
3369 int32_t rhs) {
3370 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
3371 this, dst, lhs, rhs);
3372 }
3373
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3374 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
3375 LiftoffRegister lhs,
3376 LiftoffRegister rhs) {
3377 I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchDoubleReg,
3378 liftoff::kScratchDoubleReg2, kScratchRegister);
3379 }
3380
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3381 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
3382 LiftoffRegister lhs, int32_t rhs) {
3383 I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, kScratchDoubleReg);
3384 }
3385
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3386 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
3387 LiftoffRegister lhs,
3388 LiftoffRegister rhs) {
3389 liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
3390 lhs, rhs);
3391 }
3392
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3393 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
3394 LiftoffRegister lhs, int32_t rhs) {
3395 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
3396 this, dst, lhs, rhs);
3397 }
3398
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3399 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3400 LiftoffRegister rhs) {
3401 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
3402 this, dst, lhs, rhs);
3403 }
3404
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3405 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3406 LiftoffRegister rhs) {
3407 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
3408 this, dst, lhs, rhs);
3409 }
3410
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3411 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3412 LiftoffRegister rhs) {
3413 static constexpr RegClass tmp_rc = reg_class_for(kS128);
3414 LiftoffRegister tmp1 =
3415 GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
3416 LiftoffRegister tmp2 =
3417 GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs, tmp1));
3418 I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
3419 }
3420
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3421 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
3422 LiftoffRegister src1,
3423 LiftoffRegister src2) {
3424 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3425 /*is_signed=*/true);
3426 }
3427
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3428 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
3429 LiftoffRegister src1,
3430 LiftoffRegister src2) {
3431 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3432 /*is_signed=*/false);
3433 }
3434
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3435 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
3436 LiftoffRegister src1,
3437 LiftoffRegister src2) {
3438 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3439 /*is_signed=*/true);
3440 }
3441
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3442 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
3443 LiftoffRegister src1,
3444 LiftoffRegister src2) {
3445 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3446 /*is_signed=*/false);
3447 }
3448
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)3449 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
3450 LiftoffRegister src) {
3451 Movmskpd(dst.gp(), src.fp());
3452 }
3453
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3454 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
3455 LiftoffRegister src) {
3456 Pmovsxdq(dst.fp(), src.fp());
3457 }
3458
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3459 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3460 LiftoffRegister src) {
3461 I64x2SConvertI32x4High(dst.fp(), src.fp());
3462 }
3463
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3464 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3465 LiftoffRegister src) {
3466 Pmovzxdq(dst.fp(), src.fp());
3467 }
3468
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3469 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3470 LiftoffRegister src) {
3471 I64x2UConvertI32x4High(dst.fp(), src.fp(), kScratchDoubleReg);
3472 }
3473
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3474 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3475 LiftoffRegister src) {
3476 Absps(dst.fp(), src.fp(), kScratchRegister);
3477 }
3478
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3479 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3480 LiftoffRegister src) {
3481 Negps(dst.fp(), src.fp(), kScratchRegister);
3482 }
3483
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3484 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3485 LiftoffRegister src) {
3486 Sqrtps(dst.fp(), src.fp());
3487 }
3488
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3489 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3490 LiftoffRegister src) {
3491 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3492 Roundps(dst.fp(), src.fp(), kRoundUp);
3493 return true;
3494 }
3495
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3496 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3497 LiftoffRegister src) {
3498 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3499 Roundps(dst.fp(), src.fp(), kRoundDown);
3500 return true;
3501 }
3502
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3503 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3504 LiftoffRegister src) {
3505 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3506 Roundps(dst.fp(), src.fp(), kRoundToZero);
3507 return true;
3508 }
3509
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3510 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3511 LiftoffRegister src) {
3512 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3513 Roundps(dst.fp(), src.fp(), kRoundToNearest);
3514 return true;
3515 }
3516
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3517 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3518 LiftoffRegister rhs) {
3519 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
3520 this, dst, lhs, rhs);
3521 }
3522
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3523 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3524 LiftoffRegister rhs) {
3525 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
3526 this, dst, lhs, rhs);
3527 }
3528
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3529 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3530 LiftoffRegister rhs) {
3531 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
3532 this, dst, lhs, rhs);
3533 }
3534
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3535 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3536 LiftoffRegister rhs) {
3537 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
3538 this, dst, lhs, rhs);
3539 }
3540
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3541 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3542 LiftoffRegister rhs) {
3543 F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3544 }
3545
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3546 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3547 LiftoffRegister rhs) {
3548 F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3549 }
3550
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3551 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3552 LiftoffRegister rhs) {
3553 // Due to the way minps works, pmin(a, b) = minps(b, a).
3554 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
3555 this, dst, rhs, lhs);
3556 }
3557
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3558 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3559 LiftoffRegister rhs) {
3560 // Due to the way maxps works, pmax(a, b) = maxps(b, a).
3561 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
3562 this, dst, rhs, lhs);
3563 }
3564
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)3565 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
3566 LiftoffRegister src) {
3567 Abspd(dst.fp(), src.fp(), kScratchRegister);
3568 }
3569
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)3570 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
3571 LiftoffRegister src) {
3572 Negpd(dst.fp(), src.fp(), kScratchRegister);
3573 }
3574
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)3575 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
3576 LiftoffRegister src) {
3577 Sqrtpd(dst.fp(), src.fp());
3578 }
3579
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)3580 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
3581 LiftoffRegister src) {
3582 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3583 Roundpd(dst.fp(), src.fp(), kRoundUp);
3584 return true;
3585 }
3586
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)3587 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
3588 LiftoffRegister src) {
3589 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3590 Roundpd(dst.fp(), src.fp(), kRoundDown);
3591 return true;
3592 }
3593
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)3594 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
3595 LiftoffRegister src) {
3596 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3597 Roundpd(dst.fp(), src.fp(), kRoundToZero);
3598 return true;
3599 }
3600
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)3601 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
3602 LiftoffRegister src) {
3603 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3604 Roundpd(dst.fp(), src.fp(), kRoundToNearest);
3605 return true;
3606 }
3607
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3608 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3609 LiftoffRegister rhs) {
3610 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
3611 this, dst, lhs, rhs);
3612 }
3613
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3614 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3615 LiftoffRegister rhs) {
3616 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
3617 this, dst, lhs, rhs);
3618 }
3619
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3620 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3621 LiftoffRegister rhs) {
3622 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
3623 this, dst, lhs, rhs);
3624 }
3625
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3626 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
3627 LiftoffRegister rhs) {
3628 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
3629 this, dst, lhs, rhs);
3630 }
3631
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3632 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
3633 LiftoffRegister rhs) {
3634 F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3635 }
3636
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3637 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
3638 LiftoffRegister rhs) {
3639 F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3640 }
3641
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3642 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3643 LiftoffRegister rhs) {
3644 // Due to the way minpd works, pmin(a, b) = minpd(b, a).
3645 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
3646 this, dst, rhs, lhs);
3647 }
3648
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3649 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3650 LiftoffRegister rhs) {
3651 // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
3652 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
3653 this, dst, rhs, lhs);
3654 }
3655
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)3656 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
3657 LiftoffRegister src) {
3658 Cvtdq2pd(dst.fp(), src.fp());
3659 }
3660
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)3661 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
3662 LiftoffRegister src) {
3663 F64x2ConvertLowI32x4U(dst.fp(), src.fp(), kScratchRegister);
3664 }
3665
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)3666 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
3667 LiftoffRegister src) {
3668 Cvtps2pd(dst.fp(), src.fp());
3669 }
3670
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3671 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3672 LiftoffRegister src) {
3673 I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, kScratchRegister);
3674 }
3675
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3676 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3677 LiftoffRegister src) {
3678 // NAN->0, negative->0.
3679 Pxor(kScratchDoubleReg, kScratchDoubleReg);
3680 if (CpuFeatures::IsSupported(AVX)) {
3681 CpuFeatureScope scope(this, AVX);
3682 vmaxps(dst.fp(), src.fp(), kScratchDoubleReg);
3683 } else {
3684 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3685 maxps(dst.fp(), kScratchDoubleReg);
3686 }
3687 // scratch: float representation of max_signed.
3688 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3689 Psrld(kScratchDoubleReg, uint8_t{1}); // 0x7fffffff
3690 Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
3691 // scratch2: convert (src-max_signed).
3692 // Set positive overflow lanes to 0x7FFFFFFF.
3693 // Set negative lanes to 0.
3694 if (CpuFeatures::IsSupported(AVX)) {
3695 CpuFeatureScope scope(this, AVX);
3696 vsubps(liftoff::kScratchDoubleReg2, dst.fp(), kScratchDoubleReg);
3697 } else {
3698 movaps(liftoff::kScratchDoubleReg2, dst.fp());
3699 subps(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3700 }
3701 Cmpleps(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
3702 Cvttps2dq(liftoff::kScratchDoubleReg2, liftoff::kScratchDoubleReg2);
3703 Pxor(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3704 Pxor(kScratchDoubleReg, kScratchDoubleReg);
3705 Pmaxsd(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3706 // Convert to int. Overflow lanes above max_signed will be 0x80000000.
3707 Cvttps2dq(dst.fp(), dst.fp());
3708 // Add (src-max_signed) for overflow lanes.
3709 Paddd(dst.fp(), liftoff::kScratchDoubleReg2);
3710 }
3711
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3712 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3713 LiftoffRegister src) {
3714 Cvtdq2ps(dst.fp(), src.fp());
3715 }
3716
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3717 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3718 LiftoffRegister src) {
3719 Pxor(kScratchDoubleReg, kScratchDoubleReg); // Zeros.
3720 Pblendw(kScratchDoubleReg, src.fp(), uint8_t{0x55}); // Get lo 16 bits.
3721 if (CpuFeatures::IsSupported(AVX)) {
3722 CpuFeatureScope scope(this, AVX);
3723 vpsubd(dst.fp(), src.fp(), kScratchDoubleReg); // Get hi 16 bits.
3724 } else {
3725 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3726 psubd(dst.fp(), kScratchDoubleReg);
3727 }
3728 Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // Convert lo exactly.
3729 Psrld(dst.fp(), byte{1}); // Divide by 2 to get in unsigned range.
3730 Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly.
3731 Addps(dst.fp(), dst.fp()); // Double hi, exactly.
3732 Addps(dst.fp(), kScratchDoubleReg); // Add hi and lo, may round.
3733 }
3734
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)3735 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3736 LiftoffRegister src) {
3737 Cvtpd2ps(dst.fp(), src.fp());
3738 }
3739
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3740 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3741 LiftoffRegister lhs,
3742 LiftoffRegister rhs) {
3743 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
3744 &Assembler::packsswb>(this, dst, lhs,
3745 rhs);
3746 }
3747
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3748 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3749 LiftoffRegister lhs,
3750 LiftoffRegister rhs) {
3751 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
3752 &Assembler::packuswb>(this, dst, lhs,
3753 rhs);
3754 }
3755
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3756 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3757 LiftoffRegister lhs,
3758 LiftoffRegister rhs) {
3759 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
3760 &Assembler::packssdw>(this, dst, lhs,
3761 rhs);
3762 }
3763
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3764 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3765 LiftoffRegister lhs,
3766 LiftoffRegister rhs) {
3767 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
3768 &Assembler::packusdw>(this, dst, lhs,
3769 rhs, SSE4_1);
3770 }
3771
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3772 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3773 LiftoffRegister src) {
3774 Pmovsxbw(dst.fp(), src.fp());
3775 }
3776
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3777 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3778 LiftoffRegister src) {
3779 I16x8SConvertI8x16High(dst.fp(), src.fp());
3780 }
3781
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3782 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3783 LiftoffRegister src) {
3784 Pmovzxbw(dst.fp(), src.fp());
3785 }
3786
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3787 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3788 LiftoffRegister src) {
3789 I16x8UConvertI8x16High(dst.fp(), src.fp(), kScratchDoubleReg);
3790 }
3791
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3792 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3793 LiftoffRegister src) {
3794 Pmovsxwd(dst.fp(), src.fp());
3795 }
3796
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3797 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3798 LiftoffRegister src) {
3799 I32x4SConvertI16x8High(dst.fp(), src.fp());
3800 }
3801
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3802 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3803 LiftoffRegister src) {
3804 Pmovzxwd(dst.fp(), src.fp());
3805 }
3806
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3807 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3808 LiftoffRegister src) {
3809 I32x4UConvertI16x8High(dst.fp(), src.fp(), kScratchDoubleReg);
3810 }
3811
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)3812 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
3813 LiftoffRegister src) {
3814 I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), kScratchDoubleReg,
3815 kScratchRegister);
3816 }
3817
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)3818 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
3819 LiftoffRegister src) {
3820 I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), kScratchDoubleReg,
3821 kScratchRegister);
3822 }
3823
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3824 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
3825 LiftoffRegister lhs,
3826 LiftoffRegister rhs) {
3827 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
3828 this, dst, rhs, lhs);
3829 }
3830
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3831 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3832 LiftoffRegister lhs,
3833 LiftoffRegister rhs) {
3834 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
3835 this, dst, lhs, rhs);
3836 }
3837
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3838 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3839 LiftoffRegister lhs,
3840 LiftoffRegister rhs) {
3841 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
3842 this, dst, lhs, rhs);
3843 }
3844
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)3845 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3846 LiftoffRegister src) {
3847 Pabsb(dst.fp(), src.fp());
3848 }
3849
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)3850 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3851 LiftoffRegister src) {
3852 Pabsw(dst.fp(), src.fp());
3853 }
3854
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)3855 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3856 LiftoffRegister src) {
3857 Pabsd(dst.fp(), src.fp());
3858 }
3859
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)3860 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3861 LiftoffRegister src) {
3862 I64x2Abs(dst.fp(), src.fp(), kScratchDoubleReg);
3863 }
3864
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3865 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3866 LiftoffRegister lhs,
3867 uint8_t imm_lane_idx) {
3868 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3869 movsxbl(dst.gp(), dst.gp());
3870 }
3871
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3872 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3873 LiftoffRegister lhs,
3874 uint8_t imm_lane_idx) {
3875 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3876 }
3877
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3878 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3879 LiftoffRegister lhs,
3880 uint8_t imm_lane_idx) {
3881 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3882 movsxwl(dst.gp(), dst.gp());
3883 }
3884
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3885 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3886 LiftoffRegister lhs,
3887 uint8_t imm_lane_idx) {
3888 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3889 }
3890
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3891 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3892 LiftoffRegister lhs,
3893 uint8_t imm_lane_idx) {
3894 Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
3895 }
3896
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3897 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
3898 LiftoffRegister lhs,
3899 uint8_t imm_lane_idx) {
3900 Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
3901 }
3902
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3903 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
3904 LiftoffRegister lhs,
3905 uint8_t imm_lane_idx) {
3906 F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3907 }
3908
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3909 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
3910 LiftoffRegister lhs,
3911 uint8_t imm_lane_idx) {
3912 F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3913 }
3914
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3915 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3916 LiftoffRegister src1,
3917 LiftoffRegister src2,
3918 uint8_t imm_lane_idx) {
3919 if (CpuFeatures::IsSupported(AVX)) {
3920 CpuFeatureScope scope(this, AVX);
3921 vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3922 } else {
3923 CpuFeatureScope scope(this, SSE4_1);
3924 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3925 pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
3926 }
3927 }
3928
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3929 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3930 LiftoffRegister src1,
3931 LiftoffRegister src2,
3932 uint8_t imm_lane_idx) {
3933 if (CpuFeatures::IsSupported(AVX)) {
3934 CpuFeatureScope scope(this, AVX);
3935 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3936 } else {
3937 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3938 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
3939 }
3940 }
3941
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3942 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3943 LiftoffRegister src1,
3944 LiftoffRegister src2,
3945 uint8_t imm_lane_idx) {
3946 if (CpuFeatures::IsSupported(AVX)) {
3947 CpuFeatureScope scope(this, AVX);
3948 vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3949 } else {
3950 CpuFeatureScope scope(this, SSE4_1);
3951 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3952 pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
3953 }
3954 }
3955
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3956 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
3957 LiftoffRegister src1,
3958 LiftoffRegister src2,
3959 uint8_t imm_lane_idx) {
3960 if (CpuFeatures::IsSupported(AVX)) {
3961 CpuFeatureScope scope(this, AVX);
3962 vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3963 } else {
3964 CpuFeatureScope scope(this, SSE4_1);
3965 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3966 pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
3967 }
3968 }
3969
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3970 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
3971 LiftoffRegister src1,
3972 LiftoffRegister src2,
3973 uint8_t imm_lane_idx) {
3974 if (CpuFeatures::IsSupported(AVX)) {
3975 CpuFeatureScope scope(this, AVX);
3976 vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3977 } else {
3978 CpuFeatureScope scope(this, SSE4_1);
3979 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3980 insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3981 }
3982 }
3983
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3984 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
3985 LiftoffRegister src1,
3986 LiftoffRegister src2,
3987 uint8_t imm_lane_idx) {
3988 F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
3989 }
3990
StackCheck(Label * ool_code,Register limit_address)3991 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
3992 cmpq(rsp, Operand(limit_address, 0));
3993 j(below_equal, ool_code);
3994 }
3995
CallTrapCallbackForTesting()3996 void LiftoffAssembler::CallTrapCallbackForTesting() {
3997 PrepareCallCFunction(0);
3998 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3999 }
4000
AssertUnreachable(AbortReason reason)4001 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4002 TurboAssembler::AssertUnreachable(reason);
4003 }
4004
PushRegisters(LiftoffRegList regs)4005 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4006 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4007 while (!gp_regs.is_empty()) {
4008 LiftoffRegister reg = gp_regs.GetFirstRegSet();
4009 pushq(reg.gp());
4010 gp_regs.clear(reg);
4011 }
4012 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4013 unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4014 if (num_fp_regs) {
4015 AllocateStackSpace(num_fp_regs * kSimd128Size);
4016 unsigned offset = 0;
4017 while (!fp_regs.is_empty()) {
4018 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4019 Movdqu(Operand(rsp, offset), reg.fp());
4020 fp_regs.clear(reg);
4021 offset += kSimd128Size;
4022 }
4023 DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4024 }
4025 }
4026
PopRegisters(LiftoffRegList regs)4027 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4028 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4029 unsigned fp_offset = 0;
4030 while (!fp_regs.is_empty()) {
4031 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4032 Movdqu(reg.fp(), Operand(rsp, fp_offset));
4033 fp_regs.clear(reg);
4034 fp_offset += kSimd128Size;
4035 }
4036 if (fp_offset) addq(rsp, Immediate(fp_offset));
4037 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4038 while (!gp_regs.is_empty()) {
4039 LiftoffRegister reg = gp_regs.GetLastRegSet();
4040 popq(reg.gp());
4041 gp_regs.clear(reg);
4042 }
4043 }
4044
RecordSpillsInSafepoint(Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4045 void LiftoffAssembler::RecordSpillsInSafepoint(Safepoint& safepoint,
4046 LiftoffRegList all_spills,
4047 LiftoffRegList ref_spills,
4048 int spill_offset) {
4049 int spill_space_size = 0;
4050 while (!all_spills.is_empty()) {
4051 LiftoffRegister reg = all_spills.GetFirstRegSet();
4052 if (ref_spills.has(reg)) {
4053 safepoint.DefinePointerSlot(spill_offset);
4054 }
4055 all_spills.clear(reg);
4056 ++spill_offset;
4057 spill_space_size += kSystemPointerSize;
4058 }
4059 // Record the number of additional spill slots.
4060 RecordOolSpillSpaceSize(spill_space_size);
4061 }
4062
DropStackSlotsAndRet(uint32_t num_stack_slots)4063 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4064 DCHECK_LT(num_stack_slots,
4065 (1 << 16) / kSystemPointerSize); // 16 bit immediate
4066 ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4067 }
4068
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4069 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4070 const LiftoffRegister* args,
4071 const LiftoffRegister* rets,
4072 ValueKind out_argument_kind, int stack_bytes,
4073 ExternalReference ext_ref) {
4074 AllocateStackSpace(stack_bytes);
4075
4076 int arg_bytes = 0;
4077 for (ValueKind param_kind : sig->parameters()) {
4078 liftoff::Store(this, Operand(rsp, arg_bytes), *args++, param_kind);
4079 arg_bytes += element_size_bytes(param_kind);
4080 }
4081 DCHECK_LE(arg_bytes, stack_bytes);
4082
4083 // Pass a pointer to the buffer with the arguments to the C function.
4084 movq(arg_reg_1, rsp);
4085
4086 constexpr int kNumCCallArgs = 1;
4087
4088 // Now call the C function.
4089 PrepareCallCFunction(kNumCCallArgs);
4090 CallCFunction(ext_ref, kNumCCallArgs);
4091
4092 // Move return value to the right register.
4093 const LiftoffRegister* next_result_reg = rets;
4094 if (sig->return_count() > 0) {
4095 DCHECK_EQ(1, sig->return_count());
4096 constexpr Register kReturnReg = rax;
4097 if (kReturnReg != next_result_reg->gp()) {
4098 Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4099 }
4100 ++next_result_reg;
4101 }
4102
4103 // Load potential output value from the buffer on the stack.
4104 if (out_argument_kind != kVoid) {
4105 liftoff::Load(this, *next_result_reg, Operand(rsp, 0), out_argument_kind);
4106 }
4107
4108 addq(rsp, Immediate(stack_bytes));
4109 }
4110
CallNativeWasmCode(Address addr)4111 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4112 near_call(addr, RelocInfo::WASM_CALL);
4113 }
4114
TailCallNativeWasmCode(Address addr)4115 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4116 near_jmp(addr, RelocInfo::WASM_CALL);
4117 }
4118
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4119 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4120 compiler::CallDescriptor* call_descriptor,
4121 Register target) {
4122 if (target == no_reg) {
4123 popq(kScratchRegister);
4124 target = kScratchRegister;
4125 }
4126 call(target);
4127 }
4128
TailCallIndirect(Register target)4129 void LiftoffAssembler::TailCallIndirect(Register target) {
4130 if (target == no_reg) {
4131 popq(kScratchRegister);
4132 target = kScratchRegister;
4133 }
4134 jmp(target);
4135 }
4136
CallRuntimeStub(WasmCode::RuntimeStubId sid)4137 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4138 // A direct call to a wasm runtime stub defined in this module.
4139 // Just encode the stub index. This will be patched at relocation.
4140 near_call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4141 }
4142
AllocateStackSlot(Register addr,uint32_t size)4143 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4144 AllocateStackSpace(size);
4145 movq(addr, rsp);
4146 }
4147
DeallocateStackSlot(uint32_t size)4148 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4149 addq(rsp, Immediate(size));
4150 }
4151
MaybeOSR()4152 void LiftoffAssembler::MaybeOSR() {
4153 cmpq(liftoff::GetOSRTargetSlot(), Immediate(0));
4154 j(not_equal, static_cast<Address>(WasmCode::kWasmOnStackReplace),
4155 RelocInfo::WASM_STUB_CALL);
4156 }
4157
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4158 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4159 ValueKind kind) {
4160 if (kind == kF32) {
4161 Ucomiss(src, src);
4162 } else {
4163 DCHECK_EQ(kind, kF64);
4164 Ucomisd(src, src);
4165 }
4166 Label ret;
4167 j(parity_odd, &ret);
4168 movl(Operand(dst, 0), Immediate(1));
4169 bind(&ret);
4170 }
4171
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4172 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4173 Register tmp_gp,
4174 LiftoffRegister tmp_s128,
4175 ValueKind lane_kind) {
4176 if (lane_kind == kF32) {
4177 movaps(tmp_s128.fp(), src.fp());
4178 cmpunordps(tmp_s128.fp(), tmp_s128.fp());
4179 } else {
4180 DCHECK_EQ(lane_kind, kF64);
4181 movapd(tmp_s128.fp(), src.fp());
4182 cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
4183 }
4184 pmovmskb(tmp_gp, tmp_s128.fp());
4185 orl(Operand(dst, 0), tmp_gp);
4186 }
4187
Construct(int param_slots)4188 void LiftoffStackSlots::Construct(int param_slots) {
4189 DCHECK_LT(0, slots_.size());
4190 SortInPushOrder();
4191 int last_stack_slot = param_slots;
4192 for (auto& slot : slots_) {
4193 const int stack_slot = slot.dst_slot_;
4194 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4195 last_stack_slot = stack_slot;
4196 const LiftoffAssembler::VarState& src = slot.src_;
4197 DCHECK_LT(0, stack_decrement);
4198 switch (src.loc()) {
4199 case LiftoffAssembler::VarState::kStack:
4200 if (src.kind() == kI32) {
4201 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4202 // Load i32 values to a register first to ensure they are zero
4203 // extended.
4204 asm_->movl(kScratchRegister, liftoff::GetStackSlot(slot.src_offset_));
4205 asm_->pushq(kScratchRegister);
4206 } else if (src.kind() == kS128) {
4207 asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4208 // Since offsets are subtracted from sp, we need a smaller offset to
4209 // push the top of a s128 value.
4210 asm_->pushq(liftoff::GetStackSlot(slot.src_offset_ - 8));
4211 asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4212 } else {
4213 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4214 // For all other types, just push the whole (8-byte) stack slot.
4215 // This is also ok for f32 values (even though we copy 4 uninitialized
4216 // bytes), because f32 and f64 values are clearly distinguished in
4217 // Turbofan, so the uninitialized bytes are never accessed.
4218 asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4219 }
4220 break;
4221 case LiftoffAssembler::VarState::kRegister: {
4222 int pushed = src.kind() == kS128 ? kSimd128Size : kSystemPointerSize;
4223 liftoff::push(asm_, src.reg(), src.kind(), stack_decrement - pushed);
4224 break;
4225 }
4226 case LiftoffAssembler::VarState::kIntConst:
4227 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4228 asm_->pushq(Immediate(src.i32_const()));
4229 break;
4230 }
4231 }
4232 }
4233
4234 #undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
4235
4236 } // namespace wasm
4237 } // namespace internal
4238 } // namespace v8
4239
4240 #endif // V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
4241